metacountregressor 0.1.73__py3-none-any.whl → 0.1.78__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metacountregressor/data_split_helper.py +90 -0
- metacountregressor/helperprocess.py +115 -0
- metacountregressor/main.py +41 -69
- metacountregressor/metaheuristics.py +25 -24
- metacountregressor/solution.py +189 -628
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.78.dist-info}/METADATA +1 -1
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.78.dist-info}/RECORD +10 -9
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.78.dist-info}/WHEEL +1 -1
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.78.dist-info}/LICENSE.txt +0 -0
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.78.dist-info}/top_level.txt +0 -0
metacountregressor/solution.py
CHANGED
@@ -9,12 +9,10 @@ import math
|
|
9
9
|
import os
|
10
10
|
import random
|
11
11
|
import sys
|
12
|
-
import time
|
13
12
|
import warnings
|
14
13
|
from collections import Counter
|
15
14
|
from functools import wraps
|
16
15
|
|
17
|
-
from tempfile import TemporaryFile
|
18
16
|
import traceback
|
19
17
|
import latextable
|
20
18
|
import numpy as np
|
@@ -35,15 +33,22 @@ from statsmodels.tools.numdiff import approx_fprime, approx_hess
|
|
35
33
|
from sklearn.preprocessing import StandardScaler
|
36
34
|
from texttable import Texttable
|
37
35
|
|
38
|
-
|
39
|
-
from .
|
36
|
+
try:
|
37
|
+
from ._device_cust import device as dev
|
38
|
+
from .pareto_file import Pareto, Solution
|
39
|
+
from .data_split_helper import DataProcessor
|
40
|
+
except ImportError:
|
41
|
+
from metacountregressor._device_cust import device as dev
|
42
|
+
from metacountregressor.pareto_file import Pareto, Solution
|
43
|
+
from data_split_helper import DataProcessor
|
44
|
+
|
40
45
|
|
41
46
|
np.seterr(divide='ignore', invalid='ignore')
|
42
47
|
warnings.simplefilter("ignore")
|
43
48
|
|
44
|
-
#
|
45
|
-
min_comp_val = 1e-
|
46
|
-
max_comp_val = 1e+
|
49
|
+
# define the computation boundary limits
|
50
|
+
min_comp_val = 1e-20
|
51
|
+
max_comp_val = 1e+200
|
47
52
|
log_lik_min = -1e+200
|
48
53
|
log_lik_max = 1e+200
|
49
54
|
|
@@ -117,21 +122,19 @@ class ObjectiveFunction(object):
|
|
117
122
|
|
118
123
|
def __init__(self, x_data, y_data, **kwargs):
|
119
124
|
|
120
|
-
self.reg_penalty =
|
125
|
+
self.reg_penalty = 1
|
121
126
|
self.power_up_ll = False
|
122
127
|
self.bic = None
|
123
128
|
self.other_bic = False
|
129
|
+
self.test_flag = 1
|
124
130
|
if self.other_bic:
|
125
131
|
print('change this to false latter ')
|
126
|
-
offset = None
|
127
132
|
|
128
|
-
#
|
133
|
+
# initialize values
|
129
134
|
self.constant_value = -5.5
|
130
135
|
self.negative_binomial_value = 0.05
|
131
136
|
|
132
137
|
self.verbose_safe = True
|
133
|
-
self.zi_force = None # Analst want a zi model and formally declares the zi components below
|
134
|
-
self.zi_force_names = None # delare the zi components
|
135
138
|
self.please_print = kwargs.get('please_print', 0)
|
136
139
|
self.group_halton = None
|
137
140
|
self.grad_yes = False
|
@@ -145,7 +148,7 @@ class ObjectiveFunction(object):
|
|
145
148
|
self.rdm_fit = None
|
146
149
|
self.rdm_cor_fit = None
|
147
150
|
self.dist_fit = None
|
148
|
-
|
151
|
+
|
149
152
|
self.MAE = None
|
150
153
|
self.best_obj_1 = 100000000.0
|
151
154
|
self._obj_1 = 'bic'
|
@@ -158,7 +161,7 @@ class ObjectiveFunction(object):
|
|
158
161
|
self._max_iterations_improvement = 100
|
159
162
|
self.generated_sln = set()
|
160
163
|
self.ave_mae = 0
|
161
|
-
#
|
164
|
+
# defalt paramaters for hs #TODO unpack into harmony search class
|
162
165
|
self.algorithm = 'hs' # 'sa' 'de' also avialable
|
163
166
|
self._hms = 20
|
164
167
|
self._max_time = 60 * 60 * 24
|
@@ -166,7 +169,7 @@ class ObjectiveFunction(object):
|
|
166
169
|
self._par = 0.3
|
167
170
|
self._mpai = 1
|
168
171
|
self._max_imp = 100000
|
169
|
-
self._WIC = 1000 # Number of
|
172
|
+
self._WIC = 1000 # Number of Iterations without Multiobjective Improvement
|
170
173
|
self._panels = None
|
171
174
|
self.is_multi = True
|
172
175
|
self.method_ll = 'Nelder-Mead-BFGS'
|
@@ -190,11 +193,6 @@ class ObjectiveFunction(object):
|
|
190
193
|
if k in acceptable_keys_list:
|
191
194
|
self.__setattr__(k, self.tryeval(kwargs[k]))
|
192
195
|
|
193
|
-
if self.zi_force_names is not None:
|
194
|
-
self.zi_force = True
|
195
|
-
if 'const' not in self.zi_force_names:
|
196
|
-
self.zi_force_names = ['const'] + self.zi_force_names
|
197
|
-
print('did this work?')
|
198
196
|
|
199
197
|
if 'complexity_level' in kwargs:
|
200
198
|
self.complexity_level = kwargs['complexity_level']
|
@@ -211,17 +209,22 @@ class ObjectiveFunction(object):
|
|
211
209
|
raise Exception
|
212
210
|
|
213
211
|
self.pvalue_penalty = float(kwargs.get('pvalue_penalty', 0.5))
|
214
|
-
|
215
212
|
self.pvalue_exceed = 0
|
216
213
|
self._maximize = False # do we maximize or minimize?
|
217
|
-
|
218
|
-
# data_names = self._random_forest_preprocess(x_data, y_data)
|
214
|
+
|
219
215
|
x_data = sm.add_constant(x_data)
|
220
216
|
self._input_data(x_data, y_data)
|
217
|
+
|
218
|
+
|
221
219
|
if y_data.ndim == 1:
|
222
220
|
y_data = pd.DataFrame(y_data)
|
223
221
|
|
224
|
-
|
222
|
+
'''
|
223
|
+
#TODO ADD THIS IN LATER
|
224
|
+
splitter = DataProcessor(x_data, y_data, kwargs)
|
225
|
+
self.copy_class_attributes(splitter) #inherit the self objects
|
226
|
+
'''
|
227
|
+
|
225
228
|
if self._obj_1 == 'MAE' or self._obj_2 in ["MAE", 'RMSE', 'MAE', 'MSE', 'RMSE_IN', 'RMSE_TEST']:
|
226
229
|
self.test_percentage = float(kwargs.get('test_percentage', 0))
|
227
230
|
self.val_percentage = float(kwargs.get('val_percentage', 0))
|
@@ -229,8 +232,7 @@ class ObjectiveFunction(object):
|
|
229
232
|
self.is_multi = False
|
230
233
|
|
231
234
|
if 'panels' in kwargs:
|
232
|
-
self.group_names = np.asarray(x_data[kwargs['group']].astype(
|
233
|
-
'category').cat._parent.dtype.categories)
|
235
|
+
self.group_names = np.asarray(x_data[kwargs['group']].astype('category').cat._parent.dtype.categories)
|
234
236
|
|
235
237
|
x_data[kwargs['group']] = x_data[kwargs['group']].astype(
|
236
238
|
'category').cat.codes
|
@@ -243,58 +245,39 @@ class ObjectiveFunction(object):
|
|
243
245
|
|
244
246
|
N = len(np.unique(x_data[kwargs['panels']].values))
|
245
247
|
id_unique = np.unique(x_data[kwargs['panels']].values)
|
246
|
-
|
247
248
|
except KeyError:
|
248
249
|
N = len(np.unique(x_data[kwargs['panels']]))
|
250
|
+
id_unique = np.unique(x_data[kwargs['panels']].values)
|
249
251
|
|
250
252
|
training_size = int((1 - self.test_percentage - self.val_percentage) * N)
|
251
253
|
ids = np.random.choice(N, training_size, replace=False)
|
252
254
|
ids = id_unique[ids]
|
253
255
|
train_idx = [ii for ii, id_val in enumerate(x_data[kwargs['panels']]) if id_val in ids]
|
254
256
|
test_idx = [ii for ii, id_val in enumerate(x_data[kwargs['panels']]) if id_val not in ids]
|
255
|
-
|
256
257
|
df_train = x_data.loc[train_idx, :]
|
257
258
|
df_test = x_data.loc[test_idx, :]
|
258
259
|
y_train = y_data.loc[train_idx, :]
|
259
260
|
y_test = y_data.loc[test_idx, :]
|
260
|
-
|
261
261
|
else:
|
262
262
|
N = len(x_data)
|
263
263
|
training_size = int((1 - self.test_percentage - self.val_percentage) * N)
|
264
264
|
ids = np.random.choice(N, training_size, replace=False)
|
265
265
|
id_unique = np.array([i for i in range(N)])
|
266
266
|
ids = id_unique[ids]
|
267
|
-
|
268
267
|
train_idx = [ii for ii in range(len(id_unique)) if id_unique[ii] in ids]
|
269
268
|
test_idx = [ii for ii in range(len(id_unique)) if id_unique[ii] not in ids]
|
269
|
+
df_train = x_data.loc[train_idx, :]
|
270
|
+
df_test = x_data.loc[test_idx, :]
|
271
|
+
y_train = y_data.loc[train_idx, :]
|
272
|
+
y_test = y_data.loc[test_idx, :]
|
270
273
|
|
271
|
-
try: # @IgnoreException
|
272
|
-
df_train = x_data.loc[train_idx, :]
|
273
|
-
df_test = x_data.loc[test_idx, :]
|
274
|
-
y_train = y_data.loc[train_idx, :]
|
275
|
-
y_test = y_data.loc[test_idx, :]
|
276
|
-
except:
|
277
|
-
# Convert all values to their real parts
|
278
|
-
df_real = x_data.select_dtypes(include=[np.number]).apply(np.real)
|
279
|
-
|
280
|
-
# Replace the original DataFrame's numerical columns with real-valued ones
|
281
|
-
x_data[df_real.columns] = df_real
|
282
|
-
|
283
|
-
df_train = x_data.iloc[train_idx, :]
|
284
|
-
df_test = x_data.iloc[test_idx, :]
|
285
|
-
y_train = y_data.iloc[train_idx, :]
|
286
|
-
y_test = y_data.iloc[test_idx, :]
|
287
274
|
|
288
|
-
self.n_obs = N
|
275
|
+
#self.n_obs = N
|
289
276
|
self._characteristics_names = list(self._x_data.columns)
|
290
|
-
if self.zi_force:
|
291
|
-
self.alpha_hurdle = np.isin(self._characteristics_names,
|
292
|
-
[item.split(':')[0] for item in self.zi_force_names]).astype(int).tolist()
|
293
|
-
|
294
277
|
self._max_group_all_means = 1
|
295
278
|
|
296
279
|
exclude_this_test = [4]
|
297
|
-
|
280
|
+
|
298
281
|
if 'panels' in kwargs:
|
299
282
|
self.panels = np.asarray(df_train[kwargs['panels']])
|
300
283
|
self.panels_test = np.asarray(df_test[kwargs['panels']])
|
@@ -309,7 +292,6 @@ class ObjectiveFunction(object):
|
|
309
292
|
X, Y, panel, group = self._arrange_long_format(
|
310
293
|
df_train, y_train, self.ids, self.ids, groupll)
|
311
294
|
self.group_halton = group.copy()
|
312
|
-
Y = Y.astype('float')
|
313
295
|
self.group_dummies = pd.get_dummies(group)
|
314
296
|
Xnew, Ynew, panel_info = self._balance_panels(X, Y, panel)
|
315
297
|
self.panel_info = panel_info
|
@@ -324,7 +306,6 @@ class ObjectiveFunction(object):
|
|
324
306
|
YY = Ynew.values.reshape(self.N, self.P, 1).copy()
|
325
307
|
self._x_data = XX.copy()
|
326
308
|
self._y_data = YY.copy()
|
327
|
-
# Xalt, Yalt, group_info = self._balance_panels(X, Y, group)
|
328
309
|
X, Y, panel, group = self._arrange_long_format(df_test, y_test, self.ids_test, self.panels_test, group_test)
|
329
310
|
if np.max(group) > 50:
|
330
311
|
exclude_this_test = [4]
|
@@ -353,7 +334,7 @@ class ObjectiveFunction(object):
|
|
353
334
|
|
354
335
|
self._samples, self._panels, self._characteristics = self._x_data.shape
|
355
336
|
|
356
|
-
|
337
|
+
|
357
338
|
|
358
339
|
else:
|
359
340
|
self.G = None
|
@@ -372,77 +353,37 @@ class ObjectiveFunction(object):
|
|
372
353
|
K = Xnew.shape[1]
|
373
354
|
self._characteristics_names = list(Xnew.columns)
|
374
355
|
XX = Xnew.values.reshape(self.N, self.P, K).copy()
|
375
|
-
# self.group_dummies = self.group_dummies.values.reshape(self.N, self.P, -1)
|
376
|
-
# self.group_halton = self.group_halton.reshape(self.N, self.P)[:, 0]
|
377
356
|
YY = Ynew.values.reshape(self.N, self.P, 1).copy()
|
378
357
|
self._x_data = XX.copy()
|
379
358
|
self._y_data = YY.copy()
|
380
|
-
|
359
|
+
|
381
360
|
if self.is_multi:
|
382
361
|
X, Y, panel, group = self._arrange_long_format(df_test, y_test, self.ids_test, self.ids_test, None)
|
383
362
|
if np.max(group) > 50:
|
384
363
|
exclude_this_test = [4]
|
385
364
|
else:
|
386
365
|
exclude_this_test = []
|
387
|
-
# self.group_halton_test = group.copy()
|
388
366
|
X, Y, panel_info = self._balance_panels(X, Y, panel)
|
389
|
-
|
367
|
+
|
390
368
|
self.N_test, self.P_test = panel_info.shape
|
391
|
-
|
392
|
-
# self.group_dummies_test = pd.get_dummies(group)
|
393
|
-
# self.group_dummies_test = self.group_dummies_test.values.reshape(self.N_test, self.P_test, -1)
|
394
369
|
K = X.shape[1]
|
395
370
|
self.columns_names = X.columns
|
396
371
|
X = X.values.reshape(self.N_test, self.P_test, K)
|
397
|
-
# self.group_halton_test = self.group_halton_test.reshape(self.N_test, self.P_test)[:, 0]
|
398
372
|
Y = Y.values.reshape(self.N_test, self.P_test, 1)
|
399
373
|
self._x_data_test = X.copy()
|
400
374
|
self.y_data_test = Y.copy()
|
401
|
-
|
375
|
+
|
402
376
|
self._samples, self._panels, self._characteristics = self._x_data.shape
|
403
377
|
|
404
|
-
# draws and pvalue
|
405
|
-
|
406
|
-
if 'Offset' in self._characteristics_names:
|
407
|
-
offset = True
|
408
|
-
self.have_offset = offset
|
409
|
-
if self.have_offset is not None:
|
410
|
-
try:
|
411
|
-
# offset for training data
|
412
|
-
# define offset
|
413
|
-
val_od = self.get_named_indices(['Offset'])
|
414
|
-
self._offsets = self._x_data[:, :, val_od]
|
415
|
-
|
416
|
-
# drop the offset from the data
|
417
|
-
self._x_data = np.delete(self._x_data, val_od, axis=2)
|
418
|
-
self._characteristics_names = [x for x in self._characteristics_names if not 'Offset' in x]
|
419
|
-
self._characteristics = len(self._characteristics_names)
|
420
|
-
# self._x_data.drop(columns=['Offset'], inplace=True)
|
421
|
-
|
422
|
-
# offset for testing data
|
423
|
-
if self.is_multi:
|
424
|
-
# define offset
|
425
|
-
self._offsets_test = self._x_data_test[:, :, val_od]
|
426
|
-
# self._offsets_test = self._x_data_test['Offset'].to_numpy()
|
427
|
-
# self._offsets_test = np.reshape(
|
428
|
-
# self._offsets_test, (-1, 1))
|
429
|
-
# drop the offset from the data
|
430
|
-
self._x_data_test = np.delete(self._x_data_test, val_od, axis=2)
|
431
|
-
# self._x_data_test.drop(columns=['Offset'], inplace=True)
|
432
|
-
except:
|
433
|
-
# if no offset, set as 0
|
434
|
-
self._offsets = np.zeros((self.N, self.P, 1))
|
435
|
-
if self.is_multi:
|
436
|
-
self._offsets_test = np.zeros((self.N_test, self.P_test, 1))
|
437
|
-
else:
|
438
|
-
self._offsets = np.zeros((self.N, self.P, 1))
|
439
|
-
if self.is_multi:
|
440
|
-
self._offsets_test = np.zeros((self.N_test, self.P_test, 1))
|
441
378
|
|
379
|
+
#Define the offset into the data
|
380
|
+
self.process_offset()
|
442
381
|
if self.is_multi:
|
443
382
|
self.pareto_printer = Pareto(self._obj_1, self._obj_2, True)
|
444
|
-
|
445
383
|
self._pareto_population = list()
|
384
|
+
|
385
|
+
|
386
|
+
|
446
387
|
self.Ndraws = 200 # todo: change back
|
447
388
|
self.draws1 = None
|
448
389
|
self.initial_sig = 1 # pass the test of a single model
|
@@ -480,8 +421,7 @@ class ObjectiveFunction(object):
|
|
480
421
|
self.coeff_ = None
|
481
422
|
|
482
423
|
self.significant = 0
|
483
|
-
# define the states of our
|
484
|
-
|
424
|
+
# define the states of our explanatory variables
|
485
425
|
self._discrete_values = self.define_alphas(self.complexity_level, exclude_this_test,
|
486
426
|
kwargs.get('must_include', []))
|
487
427
|
self._discrete_values = self._discrete_values + \
|
@@ -506,21 +446,83 @@ class ObjectiveFunction(object):
|
|
506
446
|
self.endog = None
|
507
447
|
# solution parameters
|
508
448
|
self._min_characteristics = 1
|
509
|
-
|
510
449
|
self._max_hurdle = 4
|
511
450
|
|
512
|
-
|
451
|
+
#Manually fit from analyst specification
|
452
|
+
manual_fit = kwargs.get('Manual_Fit')
|
453
|
+
if manual_fit is not None:
|
454
|
+
self.process_manual_fit(manual_fit)
|
455
|
+
|
456
|
+
self.solution_analyst = None
|
513
457
|
|
514
|
-
self.initial_sig = 1 # pass the test of a single model
|
515
|
-
self.pvalue_sig_value = 1
|
516
|
-
# embed the solution to how you want it
|
517
|
-
self.set_defined_seed(42)
|
518
|
-
a = self.modify_initial_fit(kwargs['Manual_Fit'])
|
519
|
-
self.makeRegression(a)
|
520
458
|
|
521
459
|
|
522
|
-
|
523
|
-
|
460
|
+
|
461
|
+
def over_ride_self(self, **kwargs):
|
462
|
+
"""
|
463
|
+
Dynamically sets attributes on the instance based on the provided keyword arguments.
|
464
|
+
"""
|
465
|
+
for key, value in kwargs.items():
|
466
|
+
setattr(self, key, value)
|
467
|
+
print(f"Updated attributes: {kwargs}")
|
468
|
+
|
469
|
+
def remove_offset(self, data, indices):
|
470
|
+
""" Remove offset data from the dataset """
|
471
|
+
new_data = np.delete(data, indices, axis=2)
|
472
|
+
return new_data
|
473
|
+
|
474
|
+
def process_offset(self):
|
475
|
+
""" Process offset if it exists in the characteristics """
|
476
|
+
try:
|
477
|
+
if 'Offset' in self._characteristics_names:
|
478
|
+
self.have_offset = True
|
479
|
+
val_od = self.get_named_indices(['Offset'])
|
480
|
+
self._offsets = self._x_data[:, :, val_od]
|
481
|
+
self._x_data = self.remove_offset(self._x_data, val_od)
|
482
|
+
self._characteristics_names = [x for x in self._characteristics_names if x != 'Offset']
|
483
|
+
self._characteristics = len(self._characteristics_names)
|
484
|
+
|
485
|
+
if self.is_multi:
|
486
|
+
self._offsets_test = self._x_data_test[:, :, val_od]
|
487
|
+
self._x_data_test = self.remove_offset(self._x_data_test, val_od)
|
488
|
+
else:
|
489
|
+
self.initialize_empty_offsets()
|
490
|
+
|
491
|
+
except Exception as e:
|
492
|
+
print(f"An error occurred: {e}") # Better error handling
|
493
|
+
self.initialize_empty_offsets()
|
494
|
+
|
495
|
+
def initialize_empty_offsets(self):
|
496
|
+
""" Initialize offsets to zero if none are found or on error """
|
497
|
+
self._offsets = np.zeros((self.N, self.P, 1))
|
498
|
+
if self.is_multi:
|
499
|
+
self._offsets_test = np.zeros((self.N_test, self.P_test, 1))
|
500
|
+
|
501
|
+
|
502
|
+
def copy_class_attributes(self, class_object):
|
503
|
+
'''
|
504
|
+
Loop through an
|
505
|
+
'''
|
506
|
+
|
507
|
+
# Loop through all attributes of the car object and copy them
|
508
|
+
for attr in vars(class_object):
|
509
|
+
setattr(self, attr, getattr(class_object, attr))
|
510
|
+
|
511
|
+
|
512
|
+
def process_manual_fit(self, manual_fit):
|
513
|
+
"""Process the manual fit configuration."""
|
514
|
+
self.initial_sig = 1 # Example: Initialize some signal
|
515
|
+
self.pvalue_sig_value = 1 # Example: Initialize another signal
|
516
|
+
self.set_defined_seed(42) # Set a specific seed
|
517
|
+
|
518
|
+
modified_fit = self.modify_initial_fit(manual_fit) # Modify the initial fit based on manual_fit
|
519
|
+
self.makeRegression(modified_fit) # Perform regression with the modified fit
|
520
|
+
|
521
|
+
|
522
|
+
def process_fit_specifications(self, find_constant, hard_code):
|
523
|
+
"""
|
524
|
+
Function to for proceccing testing, and finding a suitable initial coefficient (linear intercept)
|
525
|
+
"""
|
524
526
|
if hard_code:
|
525
527
|
manual_fit_spec = {
|
526
528
|
'fixed_terms': ['Constant', 'US', 'RSMS', 'MCV'],
|
@@ -559,7 +561,7 @@ class ObjectiveFunction(object):
|
|
559
561
|
constant_values.append(self.beta_dict['const'][0][1])
|
560
562
|
dispersion_values.append(self.beta_dict.get(self._model_type_codes[i], [[0, 0], [0, 0]])[0][1])
|
561
563
|
except:
|
562
|
-
print('
|
564
|
+
print('Error during regression analysis.')
|
563
565
|
i += 1
|
564
566
|
|
565
567
|
# Add the values of this iteration to the total
|
@@ -570,7 +572,7 @@ class ObjectiveFunction(object):
|
|
570
572
|
constant_values_avg = [x / 100 for x in constant_values_total]
|
571
573
|
dispersion_values_avg = [x / 100 for x in dispersion_values_total]
|
572
574
|
|
573
|
-
|
575
|
+
return constant_values_avg, dispersion_values_avg
|
574
576
|
|
575
577
|
|
576
578
|
def _balance_panels(self, X, y, panels): # ToDO re
|
@@ -615,22 +617,7 @@ class ObjectiveFunction(object):
|
|
615
617
|
|
616
618
|
return np.nan_to_num(quad(integrand, 0, np.inf)[0], nan=0)
|
617
619
|
|
618
|
-
|
619
|
-
import rpy2.rinterface as rinterface
|
620
|
-
import rpy2.robjects as robjects
|
621
|
-
import rpy2.robjects as ro
|
622
|
-
from rpy2.robjects import pandas2ri
|
623
|
-
r = robjects.r
|
624
|
-
r['source']('testML.R')
|
625
|
-
pandas2ri.activate()
|
626
|
-
RF_function_r = robjects.globalenv['RF_plot']
|
627
|
-
RF_function_corr_r = robjects.globalenv['RF_plot_corr']
|
628
|
-
r_df = ro.conversion.py2rpy(self._x_data)
|
629
|
-
y_dy = ro.conversion.py2rpy(self._y_data)
|
630
|
-
RF_function_r(r_df, y_dy)
|
631
|
-
|
632
|
-
print('did this work')
|
633
|
-
RF_function_corr_r(r_df, y_dy)
|
620
|
+
|
634
621
|
|
635
622
|
def print_system_utilization(self):
|
636
623
|
# Get CPU usage
|
@@ -647,7 +634,8 @@ class ObjectiveFunction(object):
|
|
647
634
|
mem_free = round(mem_info.available /
|
648
635
|
(1024 * 1024), 2) # Convert to MB
|
649
636
|
print(
|
650
|
-
f"Current memory usage: {mem_percent}% ({mem_used} MB used / {mem_total} MB total
|
637
|
+
f"Current memory usage: {mem_percent}% ({mem_used} MB used / {mem_total} MB total / "
|
638
|
+
f" mem free {mem_free})")
|
651
639
|
|
652
640
|
def _arrange_long_format(self, X, y, ids, panels=None, groups=None):
|
653
641
|
'''converts the data to long format'''
|
@@ -665,59 +653,14 @@ class ObjectiveFunction(object):
|
|
665
653
|
if group is not None:
|
666
654
|
group = group[sorted_idx]
|
667
655
|
|
668
|
-
return X, y, pnl, group
|
669
|
-
|
670
|
-
pandas_sort = 1
|
671
|
-
if pandas_sort:
|
672
|
-
if ids is not None:
|
673
|
-
|
674
|
-
pnl = panels if panels is not None else np.ones(len(ids))
|
675
|
-
df = X
|
676
|
-
|
677
|
-
df['panels'], df['ids'] = pnl, ids
|
678
|
-
new = 0
|
679
|
-
if new:
|
680
|
-
cols = np.zeros(len(ids), dtype={'names': ['panels', 'ids'],
|
681
|
-
'formats': ['<f4', '<f4']})
|
682
|
-
cols['panels'], cols['ids'] = pnl, ids
|
683
|
-
sorted_idx = np.argsort(cols, order=['panels', 'ids'])
|
684
|
-
X, y = X[sorted_idx], y[sorted_idx]
|
685
|
-
if panels is not None:
|
686
|
-
panels = panels[sorted_idx]
|
687
|
-
return X, y, panels
|
688
|
-
|
689
|
-
df = pd.concat([X.reset_index(drop=True),
|
690
|
-
y.reset_index(drop=True)], axis=1)
|
691
|
-
sorted_df = df.sort_values(
|
692
|
-
['panels', 'ids']).reset_index(drop=True)
|
693
|
-
|
694
|
-
X, y, panels = sorted_df.iloc[:, :-
|
695
|
-
3], sorted_df.iloc[:, -3:-2], sorted_df.iloc[:, -2]
|
696
|
-
if panels is not None:
|
697
|
-
# panels = panels[sorted_idx]
|
698
|
-
P_i = (
|
699
|
-
(np.unique(panels, return_counts=True)[1])).astype(int)
|
700
|
-
P = np.max(P_i)
|
701
|
-
N = len(P_i)
|
702
|
-
print(1)
|
703
|
-
return X, y, panels
|
704
|
-
|
705
|
-
if ids is not None:
|
706
|
-
X = np.asarray(X)
|
707
|
-
y = np.asarray(y)
|
708
|
-
pnl = panels if panels is not None else np.ones(len(ids))
|
709
|
-
|
710
|
-
cols = np.zeros(len(ids), dtype={'names': ['panels', 'ids'],
|
711
|
-
'formats': ['<f4', '<f4']})
|
712
|
-
cols['panels'], cols['ids'] = pnl, ids
|
713
|
-
sorted_idx = np.argsort(cols, order=['panels', 'ids'])
|
714
|
-
X, y = X[sorted_idx], y[sorted_idx]
|
715
|
-
if panels is not None:
|
716
|
-
panels = panels[sorted_idx]
|
656
|
+
return X, y.astype('float'), pnl, group
|
717
657
|
|
718
|
-
return X, y, panels
|
658
|
+
return X, y.astype('float'), panels
|
719
659
|
|
720
660
|
def _random_forest_identify_transformations(self, x_data, y_data):
|
661
|
+
'''
|
662
|
+
use the random forrest model to identify best feature
|
663
|
+
'''
|
721
664
|
# let's use the pprint module for readability
|
722
665
|
import inspect
|
723
666
|
from pprint import pprint
|
@@ -866,7 +809,6 @@ class ObjectiveFunction(object):
|
|
866
809
|
def pvalue_asterix_add(self, pvalues):
|
867
810
|
pvalue_ast = list()
|
868
811
|
for i in range(len(pvalues)):
|
869
|
-
signif = ""
|
870
812
|
if float(pvalues[i]) < 0.001:
|
871
813
|
signif = "***"
|
872
814
|
elif float(pvalues[i]) < 0.01:
|
@@ -911,8 +853,7 @@ class ObjectiveFunction(object):
|
|
911
853
|
rdm_fit = self.none_handler(self.rdm_fit)
|
912
854
|
if rdm_cor_fit is None:
|
913
855
|
rdm_cor_fit = self.none_handler(self.rdm_cor_fit)
|
914
|
-
|
915
|
-
zi_fit = self.none_handler(self.zi_fit)
|
856
|
+
|
916
857
|
dis_fit = [x for x in self.none_handler(
|
917
858
|
self.dist_fit)] # check if dis fit is name
|
918
859
|
|
@@ -977,18 +918,18 @@ class ObjectiveFunction(object):
|
|
977
918
|
br_w_names = [randvars[i] + " (Std. Dev.) " + rand_vars_dis[i]
|
978
919
|
for i in range(len(randvars))]
|
979
920
|
|
980
|
-
|
921
|
+
|
981
922
|
|
982
923
|
names = fixednames + randvars + chol_names + \
|
983
|
-
br_w_names + chol_part_1 + chol +
|
924
|
+
br_w_names + chol_part_1 + chol + hetro_long + dispersion_name
|
984
925
|
self.name_deleter = fixednames + randvars + chol_names + randvars + [chol_names[i] for i
|
985
926
|
in range(len(chol_names)) for j in
|
986
927
|
range(
|
987
|
-
i + 1)]
|
928
|
+
i + 1)] + dispersion_name # TODO does this break
|
988
929
|
name_delete_2 = fixednames + randvars + chol_names + randvars + [chol_names[i] + "/" +
|
989
930
|
chol_names[j] for i
|
990
931
|
in range(len(chol_names)) for j in
|
991
|
-
range(i + 1)]
|
932
|
+
range(i + 1)] + dispersion_name
|
992
933
|
index_dict = {}
|
993
934
|
for i, name in enumerate(name_delete_2):
|
994
935
|
split_names = name.split('/')
|
@@ -1012,9 +953,9 @@ class ObjectiveFunction(object):
|
|
1012
953
|
randvars = [x for x in self.none_handler(rdm_fit)]
|
1013
954
|
chol_names = [x for x in self.none_handler(rdm_cor_fit)]
|
1014
955
|
|
1015
|
-
zi_names = [x + ': inflated' for x in self.none_handler(self.zi_force_names)]
|
1016
956
|
|
1017
|
-
|
957
|
+
|
958
|
+
names = fixednames + randvars + chol_names + big_hetro + dispersion_name
|
1018
959
|
|
1019
960
|
names = np.array(names) # TODO check order
|
1020
961
|
self.print_transform = self.transform_id_names + \
|
@@ -1052,22 +993,8 @@ class ObjectiveFunction(object):
|
|
1052
993
|
if not isinstance(self.pvalues, np.ndarray):
|
1053
994
|
raise Exception
|
1054
995
|
|
1055
|
-
for i in range(len(self.coeff_)):
|
1056
|
-
signif = ""
|
1057
996
|
|
1058
|
-
if float(self.pvalues[i]) < 0.01:
|
1059
|
-
signif = "***"
|
1060
|
-
elif float(self.pvalues[i]) < 0.05:
|
1061
|
-
signif = "**"
|
1062
|
-
elif float(self.pvalues[i]) < 0.1:
|
1063
|
-
signif = "*"
|
1064
997
|
|
1065
|
-
'''
|
1066
|
-
print(fmt.format(self.coeff_names[i][:coeff_name_str_length], self.print_transform[i], self.coeff_[i],
|
1067
|
-
self.stderr[i], self.zvalues[i], self.pvalues[i],
|
1068
|
-
signif
|
1069
|
-
))
|
1070
|
-
'''
|
1071
998
|
if self.please_print or save_state:
|
1072
999
|
|
1073
1000
|
if self.convergance is not None:
|
@@ -1175,14 +1102,7 @@ class ObjectiveFunction(object):
|
|
1175
1102
|
self.save_to_file(latextable.draw_latex(
|
1176
1103
|
table, caption=caption, caption_above=True), file_name)
|
1177
1104
|
|
1178
|
-
# print('change this')
|
1179
|
-
# df = pd.read_csv("artificial_mixed_corr_2023_MOOF.csv")
|
1180
|
-
|
1181
|
-
# updating the column value/data
|
1182
|
-
# df['Y'] = np.mean(self.lam, axis = (1,2))
|
1183
1105
|
|
1184
|
-
# writing into the file
|
1185
|
-
# df.to_csv("artificial_mixed_corr_2023_MOOF.csv", index=False)
|
1186
1106
|
|
1187
1107
|
def summary(self, model=None, algorithm=None, transform_list=None, long_print=0, solution=None):
|
1188
1108
|
"""
|
@@ -1540,22 +1460,9 @@ class ObjectiveFunction(object):
|
|
1540
1460
|
alpha_hetro = [
|
1541
1461
|
0 if x != 5 else 1 for x in vector[:self._characteristics]]
|
1542
1462
|
|
1543
|
-
if self.zi_force == True:
|
1544
1463
|
|
1545
|
-
return {
|
1546
|
-
'alpha': alpha,
|
1547
|
-
'alpha_rdm': alpha_rdm,
|
1548
|
-
'alpha_cor_rdm': alpha_cor_rdm,
|
1549
|
-
'alpha_grouped': alpha_grouped,
|
1550
|
-
'alpha_hetro': alpha_hetro,
|
1551
|
-
'distributions': distributions,
|
1552
|
-
'transformations': transformations,
|
1553
|
-
'exog_infl': self.zi_force_names,
|
1554
|
-
'dispersion': dispersion
|
1555
|
-
}
|
1556
1464
|
|
1557
|
-
|
1558
|
-
return {
|
1465
|
+
return {
|
1559
1466
|
'alpha': alpha,
|
1560
1467
|
'alpha_rdm': alpha_rdm,
|
1561
1468
|
'alpha_cor_rdm': alpha_cor_rdm,
|
@@ -1563,7 +1470,6 @@ class ObjectiveFunction(object):
|
|
1563
1470
|
'alpha_hetro': alpha_hetro,
|
1564
1471
|
'distributions': distributions,
|
1565
1472
|
'transformations': transformations,
|
1566
|
-
|
1567
1473
|
'dispersion': dispersion
|
1568
1474
|
}
|
1569
1475
|
|
@@ -1599,7 +1505,7 @@ class ObjectiveFunction(object):
|
|
1599
1505
|
|
1600
1506
|
def repair(self, vector, reduce_to_this=10000): # todo get the number of parameters
|
1601
1507
|
'Method to repair the model so that the number of paramaters is held within the constraint'
|
1602
|
-
|
1508
|
+
|
1603
1509
|
new_j = 0
|
1604
1510
|
# extract explanatory vector
|
1605
1511
|
prmVect = vector[:self._characteristics]
|
@@ -1618,7 +1524,6 @@ class ObjectiveFunction(object):
|
|
1618
1524
|
int(np.min((5, self.complexity_level - 1)))])
|
1619
1525
|
|
1620
1526
|
count_3 = prmVect.count(3)
|
1621
|
-
this_many = count_3 * (count_3 + 1) / 2
|
1622
1527
|
|
1623
1528
|
vector[:len(prmVect)] = prmVect.copy()
|
1624
1529
|
|
@@ -1637,8 +1542,7 @@ class ObjectiveFunction(object):
|
|
1637
1542
|
# b = sum(prmVect) + self.is_dispersion(vector[-1])
|
1638
1543
|
max_loops = 100 # Maximum number of loops
|
1639
1544
|
counter = 0 # Counter variable to keep track of the number of loops
|
1640
|
-
|
1641
|
-
raise Exception('fhfhfhf')
|
1545
|
+
|
1642
1546
|
|
1643
1547
|
while b > self._max_characteristics and counter < max_loops or b > reduce_to_this:
|
1644
1548
|
|
@@ -1686,8 +1590,6 @@ class ObjectiveFunction(object):
|
|
1686
1590
|
counter += 1
|
1687
1591
|
|
1688
1592
|
counter = 0
|
1689
|
-
if any(isinstance(num, int) and num < 0 for num in vector):
|
1690
|
-
raise Exception('fhfhfhf')
|
1691
1593
|
while b < self._min_characteristics and counter < max_loops:
|
1692
1594
|
|
1693
1595
|
weights = [1 if x == 0 else 0 for x in only_ints_vals]
|
@@ -1734,13 +1636,13 @@ class ObjectiveFunction(object):
|
|
1734
1636
|
cor_l = 0 if self.rdm_cor_fit is None else len(self.rdm_cor_fit)
|
1735
1637
|
Kh = 0 if self.hetro_fit is None else len(self.hetro_fit) + len(set(self.dist_hetro))
|
1736
1638
|
|
1737
|
-
|
1639
|
+
|
1738
1640
|
Kchol = int((cor_l *
|
1739
1641
|
(cor_l + 1)) / 2)
|
1740
1642
|
n_coeff = Kf + Kr + cor_l + Kchol + Kr_b + Kh
|
1741
1643
|
if block:
|
1742
|
-
return [Kf, Kr, cor_l, Kr_b, Kchol, Kh
|
1743
|
-
return Kf, Kr, cor_l, Kr_b, Kchol, Kh
|
1644
|
+
return [Kf, Kr, cor_l, Kr_b, Kchol, Kh]
|
1645
|
+
return Kf, Kr, cor_l, Kr_b, Kchol, Kh
|
1744
1646
|
|
1745
1647
|
def find_index_of_block(self, lst, value):
|
1746
1648
|
cumulative_sum = 0
|
@@ -1821,8 +1723,7 @@ class ObjectiveFunction(object):
|
|
1821
1723
|
self.rdm_fit)):
|
1822
1724
|
raise Exception('pop wrong for id names')
|
1823
1725
|
|
1824
|
-
|
1825
|
-
# self.rdm_cor_fit.pop(self.name_deleter(idx))
|
1726
|
+
|
1826
1727
|
|
1827
1728
|
def get_value_to_delete(self, idx, dispersion):
|
1828
1729
|
block = self.get_num_params(True)
|
@@ -1858,8 +1759,7 @@ class ObjectiveFunction(object):
|
|
1858
1759
|
self.dist_fit.pop(cc[b] + len(self.rdm_fit))
|
1859
1760
|
self.transform_id_names.pop(
|
1860
1761
|
cc[b] + len(self.none_handler(self.fixed_fit)) + len(self.none_handler(self.rdm_fit)))
|
1861
|
-
|
1862
|
-
# self.rdm_cor_fit.pop(self.name_deleter(idx))
|
1762
|
+
|
1863
1763
|
|
1864
1764
|
def get_param_num(self, dispersion=0):
|
1865
1765
|
a = np.sum(self.get_num_params()) + \
|
@@ -1890,7 +1790,7 @@ class ObjectiveFunction(object):
|
|
1890
1790
|
return_violated_terms=0):
|
1891
1791
|
|
1892
1792
|
num_params = len(pvalues)
|
1893
|
-
Kf, Kr, Kc, Kr_b, Kchol, Kh
|
1793
|
+
Kf, Kr, Kc, Kr_b, Kchol, Kh = self.get_num_params()
|
1894
1794
|
|
1895
1795
|
vio_counts = 0
|
1896
1796
|
pvalues = np.array([float(string) for string in pvalues])
|
@@ -1915,18 +1815,14 @@ class ObjectiveFunction(object):
|
|
1915
1815
|
subpvalues[i] = 0
|
1916
1816
|
|
1917
1817
|
sum_k += Kr_b
|
1918
|
-
if Kchol > 0:
|
1919
|
-
cc = [i for i
|
1920
|
-
in range(len(self.rdm_cor_fit)) for j in range(i + 1)]
|
1921
|
-
|
1922
1818
|
lower_triangular = subpvalues[sum_k:sum_k + Kchol]
|
1923
1819
|
|
1924
|
-
|
1820
|
+
|
1925
1821
|
# initialize matrix with zeros
|
1926
|
-
matrix_alt = [[0] *
|
1822
|
+
matrix_alt = [[0] * Kc for _ in range(Kc)]
|
1927
1823
|
index = 0
|
1928
1824
|
|
1929
|
-
for i in range(
|
1825
|
+
for i in range(Kc):
|
1930
1826
|
for j in range(i + 1):
|
1931
1827
|
# fill in lower triangular entries
|
1932
1828
|
matrix_alt[i][j] = lower_triangular[index]
|
@@ -2414,17 +2310,9 @@ class ObjectiveFunction(object):
|
|
2414
2310
|
|
2415
2311
|
if obj_1 is not None:
|
2416
2312
|
obj_1['layout'] = vector.copy()
|
2417
|
-
# alpha, alpha_rdm, alpha_cor_rdm = self.modify(
|
2418
|
-
# obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'])
|
2419
|
-
# a = self.modifyn(model_mod)
|
2420
|
-
# vector = self.modify_vector(
|
2421
|
-
# vector, alpha, alpha_rdm, alpha_cor_rdm)
|
2422
2313
|
sub_vector = vector[:self._characteristics]
|
2423
2314
|
dispersion_parm = vector[-1]
|
2424
|
-
|
2425
|
-
num_parm = sum(sub_vector)
|
2426
|
-
else:
|
2427
|
-
num_parm = sum(sub_vector) + 1
|
2315
|
+
|
2428
2316
|
|
2429
2317
|
if not self.is_quanitifiable_num(obj_1[self._obj_1]):
|
2430
2318
|
obj_1[self._obj_1] = 10 ** 9
|
@@ -2457,7 +2345,7 @@ class ObjectiveFunction(object):
|
|
2457
2345
|
|
2458
2346
|
self.Last_Sol = obj_1.copy()
|
2459
2347
|
|
2460
|
-
|
2348
|
+
|
2461
2349
|
|
2462
2350
|
self.reset_sln()
|
2463
2351
|
if not self.is_quanitifiable_num(obj_1[self._obj_1]):
|
@@ -2495,7 +2383,7 @@ class ObjectiveFunction(object):
|
|
2495
2383
|
self.coeff_names = None
|
2496
2384
|
self.draws1 = None
|
2497
2385
|
self.coeff_ = None
|
2498
|
-
|
2386
|
+
|
2499
2387
|
self.bic = None
|
2500
2388
|
self.log_lik = None
|
2501
2389
|
self.pvalues = None
|
@@ -2589,13 +2477,13 @@ class ObjectiveFunction(object):
|
|
2589
2477
|
def set_defined_seed(self, seed):
|
2590
2478
|
print('Benchmaking test with Seed', seed)
|
2591
2479
|
np.random.seed(seed)
|
2592
|
-
|
2480
|
+
|
2593
2481
|
random.seed(seed)
|
2594
2482
|
|
2595
2483
|
def set_random_seed(self):
|
2596
2484
|
print('Imbdedding Seed', self._random_seed)
|
2597
2485
|
np.random.seed(self._random_seed)
|
2598
|
-
|
2486
|
+
|
2599
2487
|
random.seed(self._random_seed)
|
2600
2488
|
return self._random_seed
|
2601
2489
|
|
@@ -2720,85 +2608,9 @@ class ObjectiveFunction(object):
|
|
2720
2608
|
print(e)
|
2721
2609
|
print('f')
|
2722
2610
|
|
2723
|
-
def negbinom_gradients(r, p, k, a=None): # TODO: delete if wrong
|
2724
|
-
"""_summary_
|
2725
2611
|
|
2726
|
-
Args:
|
2727
|
-
r (_type_): rate paramaters or dispersion of the nb
|
2728
|
-
p (_type_): probability
|
2729
|
-
k (_type_): vector of (non-negative integer) quantiles.
|
2730
|
-
a (_type_, optional): optional paramater, if none NB model, otherwise NB-Lindley model with Lindley paramater a.
|
2731
2612
|
|
2732
|
-
Raises:
|
2733
|
-
Exception: _description_
|
2734
|
-
Exception: _description_
|
2735
|
-
ValueError: _description_
|
2736
|
-
Exception: _description_
|
2737
|
-
Exception: _description_
|
2738
|
-
|
2739
|
-
Returns:
|
2740
|
-
_type_: _description_
|
2741
|
-
"""
|
2742
|
-
# fine the NegBinom PMF
|
2743
|
-
import scipy.special as sps
|
2744
|
-
negbinom_pmf = sps.comb(k + r - 1, k) * p ** r * (1 - p) ** k
|
2745
2613
|
|
2746
|
-
# Calculate the gradient of the NegBinom PMF with respect to r and p
|
2747
|
-
d_negbinom_pmf_dr = sps.comb(
|
2748
|
-
k + r - 1, k) * (np.log(p) - sps.digamma(r)) * p ** r * (1 - p) ** k
|
2749
|
-
d_negbinom_pmf_dp = sps.comb(
|
2750
|
-
k + r - 1, k) * (r / p - (k + r) / (1 - p)) * p ** r * (1 - p) ** k
|
2751
|
-
|
2752
|
-
if a is not None:
|
2753
|
-
# Define the NegBinom-Lindley PMF
|
2754
|
-
negbinom_lindley_pmf = sps.comb(a + k - 1, k) * p ** r * (1 - p) ** k
|
2755
|
-
|
2756
|
-
# Calculate the gradient of the NegBinom-Lindley PMF with respect to r, p, and a
|
2757
|
-
d_negbinom_lindley_pmf_dr = sps.comb(
|
2758
|
-
a + k - 1, k) * (np.log(p) * p ** r * (1 - p) ** k)
|
2759
|
-
d_negbinom_lindley_pmf_dp = sps.comb(
|
2760
|
-
a + k - 1, k) * ((r / p - (k + r) / (1 - p)) * p ** r * (1 - p) ** k)
|
2761
|
-
d_negbinom_lindley_pmf_da = sps.comb(
|
2762
|
-
a + k - 1, k) * (-sps.digamma(a) + sps.digamma(a + k)) * p ** r * (1 - p) ** k
|
2763
|
-
|
2764
|
-
return [d_negbinom_pmf_dr, d_negbinom_pmf_dp], [d_negbinom_lindley_pmf_dr, d_negbinom_lindley_pmf_dp,
|
2765
|
-
d_negbinom_lindley_pmf_da]
|
2766
|
-
else:
|
2767
|
-
return [d_negbinom_pmf_dr, d_negbinom_pmf_dp]
|
2768
|
-
|
2769
|
-
def f(self, x, N, sig, mu):
|
2770
|
-
return norm.pdf(x, 0, 1) * poisson.pmf(N, np.exp(x * sig + mu))
|
2771
|
-
|
2772
|
-
def poilog(self, n, mu, sig):
|
2773
|
-
from scipy import integrate
|
2774
|
-
if len(mu) > 1 or len(sig) > 1:
|
2775
|
-
raise ValueError(
|
2776
|
-
"vectorization of mu and sig is currently not implemented")
|
2777
|
-
if any((n[n != 0] / np.trunc(n[n != 0])) != 1):
|
2778
|
-
raise ValueError("all n must be integers")
|
2779
|
-
if any(n < 0):
|
2780
|
-
raise ValueError("one or several values of n are negative")
|
2781
|
-
if not np.all(np.isfinite(np.concatenate((mu, sig)))):
|
2782
|
-
raise ValueError("all parameters should be finite")
|
2783
|
-
if sig <= 0:
|
2784
|
-
raise ValueError("sig is not larger than 0")
|
2785
|
-
spos = np.where(n < 8)[0]
|
2786
|
-
lpos = np.where(n >= 8)[0]
|
2787
|
-
val = np.empty_like(n)
|
2788
|
-
|
2789
|
-
if spos.size > 0:
|
2790
|
-
vali = np.empty(spos.size)
|
2791
|
-
for i in range(spos.size):
|
2792
|
-
try:
|
2793
|
-
vali[i] = integrate.quad(
|
2794
|
-
self.f, -np.inf, np.inf, sig, mu, args=(n[spos[i]],))[0]
|
2795
|
-
except:
|
2796
|
-
vali[i] = 1e-300
|
2797
|
-
valp = self.poilog(n[spos], mu, sig ** 2)[0]
|
2798
|
-
val[spos] = np.maximum(vali, valp)
|
2799
|
-
if lpos.size > 0:
|
2800
|
-
val[lpos] = self.poilog(n[lpos], mu, sig ** 2)[0]
|
2801
|
-
return val
|
2802
2614
|
|
2803
2615
|
def negbinom_pmf(self, r, p, k, a=None): # TODO: delete if wrong
|
2804
2616
|
"""_summary_
|
@@ -2828,45 +2640,7 @@ class ObjectiveFunction(object):
|
|
2828
2640
|
negbinom_lindley_pmf = sc.comb(a + k - 1, k) * p ** r * (1 - p) ** k
|
2829
2641
|
return negbinom_lindley_pmf
|
2830
2642
|
|
2831
|
-
def nbl_score(self, y, X, betas, alpha, theta):
|
2832
|
-
from scipy.special import gammaln, psi
|
2833
|
-
"""
|
2834
|
-
Calculate the Negative Binomial-lindley model score vector of the log-likelihood.
|
2835
|
-
|
2836
|
-
Parameters:
|
2837
|
-
-----------
|
2838
|
-
y : numpy array
|
2839
|
-
The dependent variable of the model.
|
2840
|
-
X : numpy array
|
2841
|
-
The independent variables of the model.
|
2842
|
-
beta : numpy array
|
2843
|
-
The coefficients of the model.
|
2844
|
-
alpha : float
|
2845
|
-
The dispersion parameter of the Negative Binomial-lindley distribution.
|
2846
|
-
theta : float
|
2847
|
-
The theta parameter of the Negative Binomial-lindley distribution.
|
2848
|
-
|
2849
|
-
Returns:
|
2850
|
-
--------
|
2851
|
-
score : numpy array
|
2852
|
-
The score vector of the Negative Binomial-lindley model log-likelihood.
|
2853
|
-
"""
|
2854
|
-
alpha = betas[-1]
|
2855
|
-
theta = betas[-2]
|
2856
|
-
beta = betas[:-2]
|
2857
|
-
zi = self.my_lindley(y, theta).ravel()
|
2858
|
-
|
2859
|
-
eta = np.dot(X, beta)
|
2860
|
-
mu = np.exp(eta) * zi
|
2861
|
-
p = 1 / (1 + mu * theta / alpha)
|
2862
|
-
q = 1 - p
|
2863
|
-
score = np.zeros(len(betas))
|
2864
2643
|
|
2865
|
-
for i in range(len(y)):
|
2866
|
-
score += (psi(y[i] + zi[i] * p[i]) - psi(alpha * p[i]) + np.log(zi[i])
|
2867
|
-
- np.log(1 + zi * mu[i] / alpha)) * X[i, :]
|
2868
|
-
|
2869
|
-
return score
|
2870
2644
|
|
2871
2645
|
def poisson_lognormal_glm_score(self, betas, Y, X, sigma, tau=1e-6):
|
2872
2646
|
"""
|
@@ -3918,195 +3692,7 @@ class ObjectiveFunction(object):
|
|
3918
3692
|
# np.sum(stats.poisson.logpmf(endog, np.exp(XB)))
|
3919
3693
|
return -np.exp(XB) + y * XB - sc.gammaln(y + 1)
|
3920
3694
|
|
3921
|
-
def loglik_zi(params, return_grad=False):
|
3922
|
-
"""
|
3923
|
-
Loglikelihood for observations of Generic Zero Inflated model.
|
3924
|
-
|
3925
|
-
Parameters
|
3926
|
-
----------
|
3927
|
-
params : array_like
|
3928
|
-
The parameters of the model.
|
3929
|
-
|
3930
|
-
Returns
|
3931
|
-
-------
|
3932
|
-
loglike : ndarray
|
3933
|
-
The log likelihood for each observation of the model evaluated
|
3934
|
-
at `params`. See Notes for definition.
|
3935
|
-
|
3936
|
-
Notes
|
3937
|
-
-----
|
3938
|
-
.. math:: \\ln L=\\ln(w_{i}+(1-w_{i})*P_{main\\_model})+
|
3939
|
-
\\ln(1-w_{i})+L_{main\\_model}
|
3940
|
-
where P - pdf of main model, L - loglike function of main model.
|
3941
|
-
|
3942
|
-
for observations :math:`i=1,...,n`
|
3943
|
-
"""
|
3944
|
-
params_infl = params[:self.k_inflate]
|
3945
|
-
params_main = params[self.k_inflate:]
|
3946
|
-
|
3947
|
-
y = self.endog
|
3948
|
-
w = predict_logit(params_infl, exog_infl)
|
3949
|
-
|
3950
|
-
w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
|
3951
|
-
llf_main = loglik_obs_poisson(params_main, y)
|
3952
|
-
dispersion = 0
|
3953
|
-
b_gam = None
|
3954
|
-
Xd = exog
|
3955
|
-
eta = np.tile(np.dot(Xd, params_main), (1, 1)).transpose()
|
3956
|
-
eVd = np.exp(np.clip(eta, None, EXP_UPPER_LIMIT))
|
3957
|
-
|
3958
|
-
llf_main_og = self.loglik_obs(y, eVd.ravel(), dispersion, b_gam)
|
3959
|
-
zero_idx = np.nonzero(y == 0)[0]
|
3960
|
-
nonzero_idx = np.nonzero(y)[0] # type: ignore
|
3961
|
-
|
3962
|
-
llf = np.zeros_like(y, dtype=np.float64)
|
3963
|
-
llf[zero_idx] = (np.log(w[zero_idx] +
|
3964
|
-
(1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
|
3965
|
-
llf[nonzero_idx] = np.log(
|
3966
|
-
1 - w[nonzero_idx]) + llf_main[nonzero_idx]
|
3967
|
-
if return_grad:
|
3968
|
-
score_main = Xd.T @ (y - eVd.ravel())
|
3969
|
-
L = np.exp(np.dot(Xd, params_main))
|
3970
|
-
score_main = (self.endog - L)[:, None] * Xd
|
3971
|
-
|
3972
|
-
dldp = np.zeros(
|
3973
|
-
(exog.shape[0], len(params_main)), dtype=np.float64)
|
3974
|
-
dldw = np.zeros_like(exog_infl, dtype=np.float64)
|
3975
|
-
|
3976
|
-
dldp[zero_idx, :] = (score_main[zero_idx].T *
|
3977
|
-
(1 - (w[zero_idx]) / np.exp(llf[zero_idx]))).T
|
3978
|
-
dldp[nonzero_idx, :] = score_main[nonzero_idx]
|
3979
|
-
|
3980
|
-
dldw[zero_idx, :] = (exog_infl[zero_idx].T * w[zero_idx] *
|
3981
|
-
(1 - w[zero_idx]) *
|
3982
|
-
(1 - np.exp(llf_main[zero_idx])) /
|
3983
|
-
np.exp(llf[zero_idx])).T
|
3984
|
-
dldw[nonzero_idx, :] = -(exog_infl[nonzero_idx].T *
|
3985
|
-
w[nonzero_idx]).T
|
3986
|
-
|
3987
|
-
return llf, np.hstack((dldw, dldp)).sum(axis=0)
|
3988
|
-
|
3989
|
-
else:
|
3990
|
-
|
3991
|
-
return llf
|
3992
|
-
|
3993
|
-
def zipoisson_logpmf(x, mu, w):
|
3994
|
-
return _lazywhere(x != 0, (x, mu, w),
|
3995
|
-
(lambda x, mu, w: np.log(1. - w) + x * np.log(mu) -
|
3996
|
-
sc.gammaln(x + 1.) - mu),
|
3997
|
-
np.log(w + (1. - w) * np.exp(-mu)))
|
3998
3695
|
|
3999
|
-
def zipoisson_pmf(x, mu, w):
|
4000
|
-
return np.exp(zipoisson_logpmf(x, mu, w))
|
4001
|
-
|
4002
|
-
def loglik_logit(params, endog_y, exog_x): # this is predict I think
|
4003
|
-
q = 2 * endog_y - 1
|
4004
|
-
X = exog_x
|
4005
|
-
return np.sum(np.log(cdf(q * np.dot(X, params))))
|
4006
|
-
|
4007
|
-
def predict_logit(params, exog=None, linear=False):
|
4008
|
-
if exog is None:
|
4009
|
-
exog = self.exog
|
4010
|
-
if not linear:
|
4011
|
-
return (cdf(np.dot(exog, params)))
|
4012
|
-
else:
|
4013
|
-
return (np.dot(exog, params))
|
4014
|
-
|
4015
|
-
def cdf(X):
|
4016
|
-
"""
|
4017
|
-
The logistic cumulative distribution function
|
4018
|
-
|
4019
|
-
Parameters
|
4020
|
-
----------
|
4021
|
-
X : array_like
|
4022
|
-
`X` is the linear predictor of the logit model. See notes.
|
4023
|
-
|
4024
|
-
Returns
|
4025
|
-
-------
|
4026
|
-
1/(1 + exp(-X))
|
4027
|
-
|
4028
|
-
Notes
|
4029
|
-
-----
|
4030
|
-
In the logit model,
|
4031
|
-
|
4032
|
-
.. math:: \\Lambda\\left(x^{\\prime}\\beta\\right)=
|
4033
|
-
\\text{Prob}\\left(Y=1|x\\right)=
|
4034
|
-
\\frac{e^{x^{\\prime}\\beta}}{1+e^{x^{\\prime}\\beta}}
|
4035
|
-
"""
|
4036
|
-
X = np.asarray(X)
|
4037
|
-
return 1 / (1 + np.exp(-X))
|
4038
|
-
|
4039
|
-
llobs, grad = loglik_zi(betas, return_grad)
|
4040
|
-
llf = np.sum(llobs)
|
4041
|
-
if return_grad:
|
4042
|
-
return -llf, -grad
|
4043
|
-
else:
|
4044
|
-
return -llf
|
4045
|
-
|
4046
|
-
def cdf_logit(self, X):
|
4047
|
-
"""
|
4048
|
-
The logistic cumulative distribution function
|
4049
|
-
|
4050
|
-
Parameters
|
4051
|
-
----------
|
4052
|
-
X : array_like
|
4053
|
-
`X` is the linear predictor of the logit model. See notes.
|
4054
|
-
|
4055
|
-
Returns
|
4056
|
-
-------
|
4057
|
-
1/(1 + exp(-X))
|
4058
|
-
|
4059
|
-
Notes
|
4060
|
-
-----
|
4061
|
-
In the logit model,
|
4062
|
-
|
4063
|
-
.. math:: \\Lambda\\left(x^{\\prime}\\beta\\right)=
|
4064
|
-
\\text{Prob}\\left(Y=1|x\\right)=
|
4065
|
-
\\frac{e^{x^{\\prime}\\beta}}{1+e^{x^{\\prime}\\beta}}
|
4066
|
-
"""
|
4067
|
-
X = np.asarray(X)
|
4068
|
-
return 1 / (1 + np.exp(-X))
|
4069
|
-
|
4070
|
-
def predict_logit_part(self, params_infl, exog_infl, linear=False):
|
4071
|
-
|
4072
|
-
if not linear:
|
4073
|
-
return (self.cdf_logit(np.dot(exog_infl, params_infl)))
|
4074
|
-
else:
|
4075
|
-
return (np.dot(exog_infl, params_infl))
|
4076
|
-
|
4077
|
-
def ZeroInflate_W_setup(self, exog_infl, y, params_infl):
|
4078
|
-
|
4079
|
-
w = self.predict_logit_part(params_infl, exog_infl)
|
4080
|
-
|
4081
|
-
w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
|
4082
|
-
|
4083
|
-
llf_main = [1, 2, 3] # TODO ge
|
4084
|
-
zero_idx = np.nonzero(y == 0)[0]
|
4085
|
-
nonzero_idx = np.nonzero(y)[0]
|
4086
|
-
|
4087
|
-
llf = np.zeros_like(y, dtype=np.float64)
|
4088
|
-
llf[zero_idx] = (np.log(w[zero_idx] +
|
4089
|
-
(1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
|
4090
|
-
llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
|
4091
|
-
|
4092
|
-
return llf
|
4093
|
-
|
4094
|
-
def dPXL(self, x, alpha):
|
4095
|
-
return ((alpha ** 2) * (x + 3 * alpha + (alpha ** 2) + 3)) / (1 + alpha) ** (4 + x)
|
4096
|
-
|
4097
|
-
# Define the gradient function
|
4098
|
-
|
4099
|
-
def poisson_lindley_gradient(self, params, exog, endog):
|
4100
|
-
beta = params[-1]
|
4101
|
-
mu = np.exp(np.dot(exog, params[:-1]))
|
4102
|
-
q = beta / (1 + beta)
|
4103
|
-
d_beta = (endog.ravel() + 1) / (mu + endog.ravel() + 1) - q / (1 - q)
|
4104
|
-
d_beta = self.dpoisl(endog, beta).ravel()
|
4105
|
-
d_mu = np.dot((endog - mu) * (1 - q) / (mu + endog + 1), exog)
|
4106
|
-
|
4107
|
-
grad_n = np.concatenate((d_mu, np.atleast_2d(d_beta).T), axis=1)
|
4108
|
-
der = np.sum(grad_n, axis=0)
|
4109
|
-
return der, grad_n
|
4110
3696
|
|
4111
3697
|
def dpoisl(self, x, theta, log=False):
|
4112
3698
|
# if theta < 0:
|
@@ -4183,21 +3769,9 @@ class ObjectiveFunction(object):
|
|
4183
3769
|
|
4184
3770
|
proba_r = self.general_poisson_pmf(eVd, y, b_gam)
|
4185
3771
|
|
4186
|
-
elif dispersion == 3:
|
4187
|
-
fa, ba = self.get_dispersion_paramaters(betas, dispersion)
|
4188
|
-
zi = self.my_lindley(y, ba)
|
4189
|
-
proba_r = poisson.pmf(y, zi * eVd.ravel())
|
4190
|
-
# proba_r = self.lindl_pmf_chatgpt(y, l_pam)
|
4191
|
-
# prob_2 = self.dpoisl(y, l_pam)
|
4192
|
-
# proba_r = self.poisson_lindley_pmf(eVd, l_pam, y)
|
4193
|
-
# proba_r = self.poisson_lindley_pmf(eVd, l_pam, y)
|
4194
|
-
# print(1)
|
4195
|
-
# proba_r = self.dpoisl(y, eVd)
|
4196
3772
|
|
4197
|
-
|
4198
|
-
|
4199
|
-
self.zi = self.my_lindley(eVd, ba)
|
4200
|
-
proba_r = self._nonlog_nbin(y, eVd + self.zi, b_gam)
|
3773
|
+
|
3774
|
+
|
4201
3775
|
# proba_r = self._nonlog_nbin_lindley(y, eVd, fa, ba)
|
4202
3776
|
|
4203
3777
|
elif dispersion == 'poisson_lognormal':
|
@@ -4267,7 +3841,7 @@ class ObjectiveFunction(object):
|
|
4267
3841
|
# if (len(betas) -Kf-Kr-self.is_dispersion(dispersion)) != (Kchol + Kr):
|
4268
3842
|
|
4269
3843
|
# gr_f, gr_u, gr_s = np.zeros((N, Kf)), np.zeros((N, Kr)), np.zeros((N, len(betas) -Kf-Kr-self.is_dispersion(dispersion))) #FIX
|
4270
|
-
Kf2, Kr, Kc, Kr_b, Kchol, Kh
|
3844
|
+
Kf2, Kr, Kc, Kr_b, Kchol, Kh = self.get_num_params()
|
4271
3845
|
|
4272
3846
|
gr_f, gr_u, gr_s = np.zeros((N, Kf)), np.zeros(
|
4273
3847
|
(N, Kr + Kc)), np.zeros((N, Kchol + Kr_b))
|
@@ -4716,8 +4290,10 @@ class ObjectiveFunction(object):
|
|
4716
4290
|
return self._loglik_gradient(self, betas, *stuff)
|
4717
4291
|
|
4718
4292
|
def get_br_and_bstd(betas, self):
|
4719
|
-
Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh
|
4720
|
-
|
4293
|
+
Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh = self.get_num_params()
|
4294
|
+
Kr = Kr_a + Kr_c #todo check if this works
|
4295
|
+
print('check if this works')
|
4296
|
+
br = betas[Kf_a:Kf_a + Kr]
|
4721
4297
|
# Calculate the size of the br matrix
|
4722
4298
|
br_size = int((1 + np.sqrt(1 + 8 * Kr_b_a)) / 2)
|
4723
4299
|
|
@@ -4728,7 +4304,7 @@ class ObjectiveFunction(object):
|
|
4728
4304
|
index = 0
|
4729
4305
|
for i in range(br_size):
|
4730
4306
|
for j in range(i, br_size):
|
4731
|
-
br_std[j, i] = betas[
|
4307
|
+
br_std[j, i] = betas[Kf_a + Kr + index]
|
4732
4308
|
index += 1
|
4733
4309
|
|
4734
4310
|
brstd = br_std
|
@@ -4767,7 +4343,7 @@ class ObjectiveFunction(object):
|
|
4767
4343
|
self.n_obs = len(y) # feeds into gradient
|
4768
4344
|
if draws is None and draws_grouped is None and (
|
4769
4345
|
'draws_hetro' not in model_nature or model_nature.get('draws_hetro').shape[1] == 0):
|
4770
|
-
|
4346
|
+
#TODO do i shuffle the draws
|
4771
4347
|
if type(Xd) == dict:
|
4772
4348
|
N, Kf, P = 0, 0, 0
|
4773
4349
|
for key in Xd:
|
@@ -4781,7 +4357,7 @@ class ObjectiveFunction(object):
|
|
4781
4357
|
Bf = betas[0:Kf] # Fixed betas
|
4782
4358
|
|
4783
4359
|
main_disper, lindley_disp = self.get_dispersion_paramaters(
|
4784
|
-
betas, dispersion)
|
4360
|
+
betas, dispersion) #todo fix this up
|
4785
4361
|
if lindley_disp is not None:
|
4786
4362
|
if lindley_disp <= 0:
|
4787
4363
|
penalty += 1
|
@@ -4808,29 +4384,13 @@ class ObjectiveFunction(object):
|
|
4808
4384
|
# llf_main = np.clip(llf_main, log_lik_min, log_lik_max)
|
4809
4385
|
|
4810
4386
|
loglik = llf_main.sum()
|
4811
|
-
if 'exog_infl' in model_nature:
|
4812
|
-
params_infl = betas[Kf:Kf + len(model_nature.get('exog_infl'))]
|
4813
|
-
params_main = Bf
|
4814
|
-
# ones = np.ones((model_nature.get('exog_inflX').shape[0], model_nature.get('exog_inflX').shape[1], 1))
|
4815
|
-
# exog_infl = np.concatenate((ones, model_nature.get('exog_inflX')), axis =2 )
|
4816
|
-
exog_infl = model_nature.get('exog_inflX')
|
4817
|
-
llf_main = llf_main # TODO test this
|
4818
|
-
w = self.predict_logit_part(params_infl, exog_infl)
|
4819
4387
|
|
4820
|
-
w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
|
4821
|
-
|
4822
|
-
zero_idx = np.nonzero(y == 0)[0]
|
4823
|
-
nonzero_idx = np.nonzero(y)[0] # FIXME should shape be unravelled
|
4824
|
-
|
4825
|
-
llf = np.zeros_like(y, dtype=np.float64).reshape(-1, 1) # TODO test this i added ravel to this code
|
4826
|
-
llf[zero_idx] = (np.log(w[zero_idx] + (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
|
4827
|
-
llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
|
4828
|
-
loglik = llf.sum()
|
4829
4388
|
|
4830
4389
|
loglik = np.clip(loglik, log_lik_min, log_lik_max)
|
4831
4390
|
if self.power_up_ll:
|
4832
4391
|
|
4833
4392
|
loglik += 2*loglik
|
4393
|
+
print('am i powering up')
|
4834
4394
|
penalty = self.regularise_l2(betas)
|
4835
4395
|
|
4836
4396
|
if not np.isreal(loglik):
|
@@ -4851,7 +4411,7 @@ class ObjectiveFunction(object):
|
|
4851
4411
|
else:
|
4852
4412
|
return -loglik + penalty
|
4853
4413
|
# Else, we have draws
|
4854
|
-
self.n_obs = len(y) * self.Ndraws
|
4414
|
+
self.n_obs = len(y) * self.Ndraws #todo is this problematic
|
4855
4415
|
penalty += self._penalty_betas(
|
4856
4416
|
betas, dispersion, penalty, float(len(y) / 10.0))
|
4857
4417
|
|
@@ -4908,7 +4468,7 @@ class ObjectiveFunction(object):
|
|
4908
4468
|
# if (Kchol +Kr) != (len(betas) -Kf-Kr -self.is_dispersion(dispersion)):
|
4909
4469
|
# print('I think this is fine')
|
4910
4470
|
n_coeff = self.get_param_num(dispersion)
|
4911
|
-
Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh
|
4471
|
+
Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh = self.get_num_params()
|
4912
4472
|
if Kchol_a != Kchol:
|
4913
4473
|
print('hold')
|
4914
4474
|
|
@@ -5905,7 +5465,7 @@ class ObjectiveFunction(object):
|
|
5905
5465
|
b[-1] = .5
|
5906
5466
|
if method == 'L-BFGS-B' or method2 == 'L-BFGS-B':
|
5907
5467
|
|
5908
|
-
Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh
|
5468
|
+
Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh= self.get_num_params()
|
5909
5469
|
if Kh > 0:
|
5910
5470
|
Kh_e = mod.get('XH').shape[-1]
|
5911
5471
|
Kh_range = Kh - Kh_e
|
@@ -6024,14 +5584,14 @@ class ObjectiveFunction(object):
|
|
6024
5584
|
mod['dispersion_penalty'] = np.abs(b[-1])
|
6025
5585
|
grad_args = (
|
6026
5586
|
X, y, draws, X, Xr, self.batch_size, False, False, dispersion, 0, False, 0, self.rdm_cor_fit,
|
6027
|
-
|
5587
|
+
None, exog_infl, draws_grouped, XG, mod)
|
6028
5588
|
# self.gradients_est_yes = (1, 1)
|
6029
5589
|
|
6030
5590
|
if draws is None and draws_hetro is not None:
|
6031
5591
|
print('hold')
|
6032
5592
|
betas_est = self._minimize(self._loglik_gradient, b, args=(
|
6033
5593
|
X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
|
6034
|
-
self.rdm_cor_fit,
|
5594
|
+
self.rdm_cor_fit, None, exog_infl, draws_grouped, XG, mod),
|
6035
5595
|
method=method2, tol=tol['ftol'],
|
6036
5596
|
options={'gtol': tol['gtol']}, bounds=bounds,
|
6037
5597
|
hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
|
@@ -6050,7 +5610,7 @@ class ObjectiveFunction(object):
|
|
6050
5610
|
betas_est = self._minimize(self._loglik_gradient, b, args=(
|
6051
5611
|
X, y, draws, X, Xr, self.batch_size, False, False, dispersion, 0, False, 0,
|
6052
5612
|
self.rdm_cor_fit,
|
6053
|
-
|
5613
|
+
None, exog_infl, draws_grouped, XG, mod),
|
6054
5614
|
method=method2, tol=tol['ftol'],
|
6055
5615
|
options={'gtol': tol['gtol']})
|
6056
5616
|
|
@@ -6059,7 +5619,7 @@ class ObjectiveFunction(object):
|
|
6059
5619
|
|
6060
5620
|
if np.isfinite(betas_est['fun']):
|
6061
5621
|
self.naming_for_printing(
|
6062
|
-
betas_est['x'], 0, dispersion,
|
5622
|
+
betas_est['x'], 0, dispersion, model_nature=mod)
|
6063
5623
|
|
6064
5624
|
if method2 == 'L-BFGS-B':
|
6065
5625
|
|
@@ -6226,8 +5786,7 @@ class ObjectiveFunction(object):
|
|
6226
5786
|
self.rdm_cor_fit = [x for x, y in zip(
|
6227
5787
|
select_data, model_nature.get('alpha_cor_rdm')) if y == 1]
|
6228
5788
|
|
6229
|
-
|
6230
|
-
# self.zi_fit = [x for x, y in zip(select_data, model_nature.get('exog_infl')) if y == 1]
|
5789
|
+
|
6231
5790
|
# if alpha_grouped is not None:
|
6232
5791
|
self.grouped_rpm = [x for x, y in zip(select_data, model_nature.get('alpha_grouped')) if y == 1]
|
6233
5792
|
self.hetro_fit = [x for x, y in zip(select_data, model_nature.get('alpha_hetro')) if y == 1]
|
@@ -6405,14 +5964,14 @@ class ObjectiveFunction(object):
|
|
6405
5964
|
df_test[:, :, idx], model_nature.get('transformations')[idx] = self.transformer(
|
6406
5965
|
t, idx, df_test[:, :, idx])
|
6407
5966
|
if np.max(df_tf[:, :, idx]) >= 77000:
|
5967
|
+
|
6408
5968
|
raise Exception('should not be possible')
|
6409
5969
|
|
6410
5970
|
self.define_selfs_fixed_rdm_cor(model_nature)
|
6411
5971
|
indices = self.get_named_indices(self.fixed_fit)
|
6412
5972
|
indices5 = self.get_named_indices(self.hetro_fit)
|
6413
5973
|
|
6414
|
-
|
6415
|
-
model_nature['exog_inflX'] = df_tf[:, :, self.get_named_indices(self.zi_force_names)]
|
5974
|
+
|
6416
5975
|
|
6417
5976
|
x_h_storage = []
|
6418
5977
|
x_h_storage_test = []
|
@@ -6445,7 +6004,7 @@ class ObjectiveFunction(object):
|
|
6445
6004
|
if XG is not None:
|
6446
6005
|
indices4_test = np.repeat(self.get_named_indices(self.grouped_rpm),
|
6447
6006
|
self.group_dummies_test.shape[2]) if self.grouped_rpm != [] else []
|
6448
|
-
XGtest = np.tile(self.group_dummies_test, len(self.grouped_rpm)) * df_test[:, :,
|
6007
|
+
XGtest = np.tile(self.group_dummies_test, len(self.grouped_rpm)) * df_test[:, :, indices4_test]
|
6449
6008
|
model_nature['XG'] = XG
|
6450
6009
|
model_nature['XGtest'] = XGtest
|
6451
6010
|
|
@@ -6488,7 +6047,8 @@ class ObjectiveFunction(object):
|
|
6488
6047
|
Xr_test = None
|
6489
6048
|
model_nature['Xr_test'] = Xr_test
|
6490
6049
|
if (Xr.ndim <= 1) or (Xr.shape[0] <= 11) or np.isin(Xr, [np.inf, -np.inf, None, np.nan]).any():
|
6491
|
-
print('
|
6050
|
+
print('Not Possible')
|
6051
|
+
raise Exception
|
6492
6052
|
if Xr.size == 0:
|
6493
6053
|
Xr = None
|
6494
6054
|
Xr_test = None
|
@@ -6509,10 +6069,10 @@ class ObjectiveFunction(object):
|
|
6509
6069
|
obj_1.add_layout(layout)
|
6510
6070
|
|
6511
6071
|
model_form_name = self.check_complexity(
|
6512
|
-
self.fixed_fit, self.rdm_fit, self.rdm_cor_fit,
|
6072
|
+
self.fixed_fit, self.rdm_fit, self.rdm_cor_fit, None, dispersion, is_halton, model_nature)
|
6513
6073
|
|
6514
6074
|
obj_1.add_names(self.fixed_fit.copy(), self.rdm_fit.copy(),
|
6515
|
-
self.rdm_cor_fit.copy(), model_form_name,
|
6075
|
+
self.rdm_cor_fit.copy(), model_form_name, None, pvalues)
|
6516
6076
|
if not isinstance(obj_1, dict):
|
6517
6077
|
raise Exception('should not be possible')
|
6518
6078
|
|
@@ -6540,7 +6100,7 @@ class ObjectiveFunction(object):
|
|
6540
6100
|
else:
|
6541
6101
|
obj_1 = Solution()
|
6542
6102
|
self.significant = 3
|
6543
|
-
print('
|
6103
|
+
print('not_implemented yet') #TODO check this for exciddeing values
|
6544
6104
|
|
6545
6105
|
if self.is_quanitifiable_num(obj_1[self._obj_1]) and pvalues is not None:
|
6546
6106
|
self.bic = obj_1['bic']
|
@@ -6563,8 +6123,9 @@ class ObjectiveFunction(object):
|
|
6563
6123
|
if self.significant == 0:
|
6564
6124
|
|
6565
6125
|
print(self.full_model, 'full model is')
|
6566
|
-
|
6567
|
-
|
6126
|
+
if not self.test_flag:
|
6127
|
+
alpha, alpha_rdm, alpha_cor_rdm = self.modify(
|
6128
|
+
self.fixed_fit, self.rdm_fit, self.rdm_cor_fit)
|
6568
6129
|
|
6569
6130
|
return obj_1, model_nature
|
6570
6131
|
|
@@ -6581,8 +6142,9 @@ class ObjectiveFunction(object):
|
|
6581
6142
|
self.significant = 3
|
6582
6143
|
|
6583
6144
|
return obj_1, model_nature
|
6584
|
-
|
6585
|
-
|
6145
|
+
if not self.test_flag:
|
6146
|
+
alpha, alpha_rdm, alpha_cor_rdm = self.modify(
|
6147
|
+
self.fixed_fit, self.rdm_fit, self.rdm_cor_fit)
|
6586
6148
|
if self.grab_transforms:
|
6587
6149
|
|
6588
6150
|
if is_halton and self.significant == 1:
|
@@ -6692,7 +6254,7 @@ class ObjectiveFunction(object):
|
|
6692
6254
|
alpha_cor_rdm = np.in1d(select_data, cor_rdm) * 1
|
6693
6255
|
alpha_cor_rdm = alpha_cor_rdm.tolist()
|
6694
6256
|
alpha_group_rdm = np.in1d(select_data, group_rdm) * 1
|
6695
|
-
alpha_group_rdm = alpha_group_rdm.tolist()
|
6257
|
+
alpha_group_rdm = alpha_group_rdm.tolist() #todo will this ever trigger
|
6696
6258
|
return alpha, alpha_rdm, alpha_cor_rdm
|
6697
6259
|
|
6698
6260
|
def show_transforms(self, fix, rdm):
|
@@ -6825,9 +6387,10 @@ class ObjectiveFunction(object):
|
|
6825
6387
|
|
6826
6388
|
Br_come_one = self.Br.copy()
|
6827
6389
|
# betas_random = self._transform_rand_betas(betas, betas_std, draws)
|
6828
|
-
|
6390
|
+
#todo make sure this works for ln and truncated normal
|
6829
6391
|
if any(set(distribution).intersection(['ln_normal', 'tn_normal'])):
|
6830
|
-
|
6392
|
+
print('check this, intesection shouldn not happen for all')
|
6393
|
+
Br_come_one = self._transform_rand_betas(betas, betas_std, draws)
|
6831
6394
|
for k, dist_k in enumerate(distribution):
|
6832
6395
|
if dist_k == 'ln_normal':
|
6833
6396
|
der[:, k, :] = Br_come_one[:, k, :]
|
@@ -6837,9 +6400,7 @@ class ObjectiveFunction(object):
|
|
6837
6400
|
return der
|
6838
6401
|
|
6839
6402
|
def _copy_size_display_as_ones(self, matrix):
|
6840
|
-
|
6841
|
-
please = matrix.shape
|
6842
|
-
der = dev.np.ones((please), dtype=matrix.dtype)
|
6403
|
+
der = dev.np.ones(matrix.shape, dtype=matrix.dtype)
|
6843
6404
|
return der
|
6844
6405
|
|
6845
6406
|
def prepare_halton(self, dim, n_sample, draws, distribution, long=False, slice_this_way=None):
|