metacountregressor 0.1.120__py3-none-any.whl → 0.1.122__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- metacountregressor/helperprocess.py +71 -0
- metacountregressor/main.py +113 -53
- metacountregressor/metaheuristics.py +1 -1
- metacountregressor/solution.py +126 -34
- {metacountregressor-0.1.120.dist-info → metacountregressor-0.1.122.dist-info}/METADATA +1 -1
- {metacountregressor-0.1.120.dist-info → metacountregressor-0.1.122.dist-info}/RECORD +9 -9
- {metacountregressor-0.1.120.dist-info → metacountregressor-0.1.122.dist-info}/LICENSE.txt +0 -0
- {metacountregressor-0.1.120.dist-info → metacountregressor-0.1.122.dist-info}/WHEEL +0 -0
- {metacountregressor-0.1.120.dist-info → metacountregressor-0.1.122.dist-info}/top_level.txt +0 -0
@@ -3,6 +3,7 @@ import pandas as pd
|
|
3
3
|
import csv
|
4
4
|
import matplotlib.pyplot as plt
|
5
5
|
|
6
|
+
|
6
7
|
plt.style.use('https://github.com/dhaitz/matplotlib-stylesheets/raw/master/pitayasmoothie-dark.mplstyle')
|
7
8
|
|
8
9
|
##Select the best Features Based on RF
|
@@ -151,6 +152,74 @@ def remove_files(yes=1):
|
|
151
152
|
os.remove('pop_log.csv')
|
152
153
|
|
153
154
|
|
155
|
+
# Function to process the DataFrame
|
156
|
+
'''
|
157
|
+
Example usuage
|
158
|
+
# Configuration dictionary
|
159
|
+
config = {
|
160
|
+
'Age': {
|
161
|
+
'type': 'bin',
|
162
|
+
'bins': [0, 18, 35, 50, 100],
|
163
|
+
'labels': ['Child', 'YoungAdult', 'MiddleAged', 'Senior'],
|
164
|
+
'prefix': 'Age_Binned'
|
165
|
+
},
|
166
|
+
'Income': {
|
167
|
+
'type': 'bin',
|
168
|
+
'bins': [0, 2000, 5000, 10000],
|
169
|
+
'labels': ['Low', 'Medium', 'High'],
|
170
|
+
'prefix': 'Income_Binned'
|
171
|
+
},
|
172
|
+
'Gender': {
|
173
|
+
'type': 'one-hot',
|
174
|
+
'prefix': 'Gender'
|
175
|
+
},
|
176
|
+
'Score': {
|
177
|
+
'type': 'none'
|
178
|
+
}
|
179
|
+
}
|
180
|
+
'''
|
181
|
+
|
182
|
+
|
183
|
+
def transform_dataframe(df, config):
|
184
|
+
output_df = pd.DataFrame()
|
185
|
+
|
186
|
+
for column, settings in config.items():
|
187
|
+
if settings['type'] == 'bin':
|
188
|
+
# Apply binning
|
189
|
+
binned = pd.cut(
|
190
|
+
df[column],
|
191
|
+
bins=settings['bins'],
|
192
|
+
labels=settings['labels'],
|
193
|
+
right=False
|
194
|
+
)
|
195
|
+
# One-hot encode the binned column
|
196
|
+
binned_dummies = pd.get_dummies(binned, prefix=settings['prefix'])
|
197
|
+
output_df = pd.concat([output_df, binned_dummies], axis=1)
|
198
|
+
|
199
|
+
elif settings['type'] == 'one-hot':
|
200
|
+
# One-hot encode the column
|
201
|
+
one_hot_dummies = pd.get_dummies(df[column], prefix=settings.get('prefix', column))
|
202
|
+
output_df = pd.concat([output_df, one_hot_dummies], axis=1)
|
203
|
+
|
204
|
+
elif settings['type'] == 'continuous':
|
205
|
+
# Apply function to continuous data
|
206
|
+
data = df[column]
|
207
|
+
if 'bounds' in settings:
|
208
|
+
# Apply bounds filtering
|
209
|
+
lower, upper = settings['bounds']
|
210
|
+
data = data[(data >= lower) & (data <= upper)]
|
211
|
+
if 'apply_func' in settings:
|
212
|
+
# Apply custom function
|
213
|
+
data = data.apply(settings['apply_func'])
|
214
|
+
output_df[column] = data
|
215
|
+
|
216
|
+
elif settings['type'] == 'none':
|
217
|
+
# Leave the column unchanged
|
218
|
+
output_df = pd.concat([output_df, df[[column]]], axis=1)
|
219
|
+
|
220
|
+
return output_df
|
221
|
+
|
222
|
+
|
154
223
|
def as_wide_factor(x_df, yes=1, min_factor=2, max_factor=8, keep_original=0, exclude=[]):
|
155
224
|
if not yes:
|
156
225
|
return x_df
|
@@ -330,3 +399,5 @@ def entries_to_remove(entries, the_dict):
|
|
330
399
|
for key in entries:
|
331
400
|
if key in the_dict:
|
332
401
|
del the_dict[key]
|
402
|
+
|
403
|
+
|
metacountregressor/main.py
CHANGED
@@ -28,12 +28,60 @@ def convert_df_columns_to_binary_and_wide(df):
|
|
28
28
|
return df
|
29
29
|
|
30
30
|
|
31
|
-
def process_arguments():
|
31
|
+
def process_arguments(**kwargs):
|
32
32
|
'''
|
33
33
|
TRYING TO TURN THE CSV FILES INTO RELEVANT ARGS
|
34
34
|
'''
|
35
|
-
|
36
|
-
|
35
|
+
#dataset
|
36
|
+
if kwargs.get('dataset_file', False
|
37
|
+
):
|
38
|
+
dataset = pd.read_csv(kwargs.get('dataset_file'))
|
39
|
+
named_data_headers = dataset.columns.tolist()
|
40
|
+
decision_constants = {name: list(range(7)) for name in named_data_headers}
|
41
|
+
data_info = {
|
42
|
+
|
43
|
+
|
44
|
+
'AADT': {
|
45
|
+
'type': 'continuous',
|
46
|
+
'bounds': [0.0, np.infty],
|
47
|
+
'discrete': False,
|
48
|
+
'apply_func': (lambda x: np.log(x + 1)),
|
49
|
+
},
|
50
|
+
'SPEED': {
|
51
|
+
'type': 'continuous',
|
52
|
+
'bounds': [0, 100],
|
53
|
+
'enforce_bounds': True,
|
54
|
+
'discrete': True
|
55
|
+
},
|
56
|
+
'TIME': {
|
57
|
+
'type': 'continuous',
|
58
|
+
'bounds': [0, 23.999],
|
59
|
+
'discrete': False
|
60
|
+
}
|
61
|
+
}
|
62
|
+
#remove ID CoLUMNS from dataset
|
63
|
+
dataset = dataset.drop(columns = [
|
64
|
+
'ID'
|
65
|
+
])
|
66
|
+
for c in dataset.columns:
|
67
|
+
if c not in data_info.keys():
|
68
|
+
data_info[c] = {'type': 'categorical'}
|
69
|
+
|
70
|
+
data_new =helperprocess.transform_dataframe(dataset,data_info)
|
71
|
+
|
72
|
+
update_constant = kwargs.get('analyst_constraints')
|
73
|
+
#update the decision_constraints
|
74
|
+
|
75
|
+
data_characteristic = pd.read_csv(kwargs.get('problem_data', 'problem_data.csv'))
|
76
|
+
# Extract the column as a list of characteristic names
|
77
|
+
name_data_characteristics = data_characteristic.columns.tolist()
|
78
|
+
|
79
|
+
# Create the dictionary
|
80
|
+
decision_constraints = {name: list(range(7)) for name in name_data_characteristics}
|
81
|
+
|
82
|
+
print('this gets all the features, I need to remove...')
|
83
|
+
|
84
|
+
analyst_d = pd.read_csv(kwargs.get('decison_constraints', 'decisions.csv'))
|
37
85
|
hyper = pd.read_csv('setup_hyper.csv')
|
38
86
|
|
39
87
|
new_data = {'data': data_characteristic,
|
@@ -41,7 +89,7 @@ def process_arguments():
|
|
41
89
|
'hyper': hyper}
|
42
90
|
return new_data
|
43
91
|
|
44
|
-
def
|
92
|
+
def process_package_arguments():
|
45
93
|
|
46
94
|
new_data = {}
|
47
95
|
pass
|
@@ -319,8 +367,8 @@ def main(args, **kwargs):
|
|
319
367
|
x_df = helperprocess.interactions(x_df, keep)
|
320
368
|
|
321
369
|
|
322
|
-
|
323
|
-
data_info = process_arguments()
|
370
|
+
elif dataset ==10: # the dataset has been selected in the program as something else
|
371
|
+
data_info = process_arguments(**args)
|
324
372
|
data_info['hyper']
|
325
373
|
data_info['analyst']
|
326
374
|
data_info['data']['Y']
|
@@ -339,6 +387,10 @@ def main(args, **kwargs):
|
|
339
387
|
y_df = df[[data_info['data']['Y'][0]]]
|
340
388
|
y_df.rename(columns={data_info['data']['Y'][0]: "Y"}, inplace=True)
|
341
389
|
print('test') #FIXME
|
390
|
+
else:
|
391
|
+
print('PROCESS THE PACKAGE ARGUMENTS SIMULIAR TO HOW ONE WOULD DEFINE THE ENVIRONMENT')
|
392
|
+
data_info =process_package_argumemnts()
|
393
|
+
|
342
394
|
|
343
395
|
if args['Keep_Fit'] == str(2) or args['Keep_Fit'] == 2:
|
344
396
|
if manual_fit_spec is None:
|
@@ -444,55 +496,63 @@ if __name__ == '__main__':
|
|
444
496
|
parser = argparse.ArgumentParser(prog='main',
|
445
497
|
epilog=main.__doc__,
|
446
498
|
formatter_class=argparse.RawDescriptionHelpFormatter, conflict_handler='resolve')
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
if
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
parser.
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
499
|
+
|
500
|
+
|
501
|
+
BATCH_JOB = True
|
502
|
+
|
503
|
+
if BATCH_JOB:
|
504
|
+
parser.add_argument('-dataset_file', default='data/Ex-16-3.csv', help='supply the path to the dataset')
|
505
|
+
|
506
|
+
parser.add_argument('-line', type=int, default=1,
|
507
|
+
help='line to read in csv to pass in argument')
|
508
|
+
|
509
|
+
if vars(parser.parse_args())['line'] is not None:
|
510
|
+
reader = csv.DictReader(open('set_data.csv', 'r'))
|
511
|
+
args = list()
|
512
|
+
line_number_obs = 0
|
513
|
+
for dictionary in reader: # TODO find a way to handle multiple args
|
514
|
+
args = dictionary
|
515
|
+
if line_number_obs == int(vars(parser.parse_args())['line']):
|
516
|
+
break
|
517
|
+
line_number_obs += 1
|
518
|
+
args = dict(args)
|
519
|
+
|
520
|
+
|
521
|
+
for key, value in args.items():
|
522
|
+
try:
|
523
|
+
# Attempt to parse the string value to a Python literal if value is a string.
|
524
|
+
if isinstance(value, str):
|
525
|
+
value = ast.literal_eval(value)
|
526
|
+
except (ValueError, SyntaxError):
|
527
|
+
# If there's a parsing error, value remains as the original string.
|
528
|
+
pass
|
529
|
+
|
530
|
+
# Add the argument to the parser with the potentially updated value.
|
531
|
+
parser.add_argument(f'-{key}', default=value)
|
532
|
+
|
533
|
+
for i, action in enumerate(parser._optionals._actions):
|
534
|
+
if "-algorithm" in action.option_strings:
|
535
|
+
parser._optionals._actions[i].help = "optimization algorithm"
|
536
|
+
|
537
|
+
override = True
|
538
|
+
if override:
|
539
|
+
print('WARNING: TESTING ENVIRONMENT, TURN OFF FOR RELEASE')
|
540
|
+
parser.add_argument('-problem_number', default='10')
|
541
|
+
|
542
|
+
if 'algorithm' not in args:
|
543
|
+
parser.add_argument('-algorithm', type=str, default='hs',
|
544
|
+
help='optimization algorithm')
|
545
|
+
elif 'Manual_Fit' not in args:
|
546
|
+
parser.add_argument('-Manual_Fit', action='store_false', default=None,
|
547
|
+
help='To fit a model manually if desired.')
|
548
|
+
|
549
|
+
parser.add_argument('-seperate_out_factors', action='store_false', default=False,
|
550
|
+
help='Trie of wanting to split data that is potentially categorical as binary'
|
551
|
+
' we want to split the data for processing')
|
552
|
+
parser.add_argument('-supply_csv', type = str, help = 'enter the name of the csv, please include it as a full directories')
|
494
553
|
|
495
554
|
else: # DIDN"T SPECIFY LINES TRY EACH ONE MANNUALY
|
555
|
+
print("RUNNING WITH ARGS")
|
496
556
|
parser.add_argument('-com', type=str, default='MetaCode',
|
497
557
|
help='line to read csv')
|
498
558
|
|
metacountregressor/solution.py
CHANGED
@@ -152,7 +152,7 @@ class ObjectiveFunction(object):
|
|
152
152
|
self.dist_fit = None
|
153
153
|
|
154
154
|
self.MAE = None
|
155
|
-
self.best_obj_1 =
|
155
|
+
self.best_obj_1 = 1000000.0
|
156
156
|
self._obj_1 = 'bic'
|
157
157
|
self._obj_2 = 'MSE'
|
158
158
|
self.numerical_hessian_calc = 0 # calculates hessian by statsmodels otherwise scipy
|
@@ -395,7 +395,7 @@ class ObjectiveFunction(object):
|
|
395
395
|
|
396
396
|
|
397
397
|
|
398
|
-
self.Ndraws =
|
398
|
+
self.Ndraws = 200 # todo: change back
|
399
399
|
self.draws1 = None
|
400
400
|
self.initial_sig = 1 # pass the test of a single model
|
401
401
|
self.pvalue_sig_value = .1
|
@@ -449,8 +449,17 @@ class ObjectiveFunction(object):
|
|
449
449
|
if 'model_types' in kwargs:
|
450
450
|
model_types = kwargs['model_types']
|
451
451
|
else:
|
452
|
-
|
452
|
+
print('the type of models possible are:')
|
453
453
|
|
454
|
+
model_types = [[0, 1]] # add 2 for Generalized Poisson
|
455
|
+
model_types = [[0]]
|
456
|
+
#TODO change back and fix NB
|
457
|
+
model_t_dict = {'Poisson':0,
|
458
|
+
"NB":1}
|
459
|
+
# Retrieve the keys (model names) corresponding to the values in model_types
|
460
|
+
model_keys = [key for key, value in model_t_dict.items() if value in model_types[0]]
|
461
|
+
# Print the formatted result
|
462
|
+
print(f'The type of models possible are: {", ".join(model_keys)}')
|
454
463
|
self._discrete_values = self._discrete_values + self.define_poissible_transforms(
|
455
464
|
self._transformations, kwargs.get('decisions',None)) + model_types
|
456
465
|
|
@@ -470,6 +479,7 @@ class ObjectiveFunction(object):
|
|
470
479
|
#Manually fit from analyst specification
|
471
480
|
manual_fit = kwargs.get('Manual_Fit')
|
472
481
|
if manual_fit is not None:
|
482
|
+
print('fitting manual')
|
473
483
|
self.process_manual_fit(manual_fit)
|
474
484
|
|
475
485
|
self.solution_analyst = None
|
@@ -1372,7 +1382,7 @@ class ObjectiveFunction(object):
|
|
1372
1382
|
bb = eVy -1
|
1373
1383
|
disp = sm.OLS(ab.ravel(), bb.ravel()).fit()
|
1374
1384
|
gamma = disp.params[0]
|
1375
|
-
print(f'dispersion is {gamma}')
|
1385
|
+
#print(f'dispersion is {gamma}')
|
1376
1386
|
return gamma
|
1377
1387
|
|
1378
1388
|
def validation(self, betas, y, X, Xr=None, dispersion=0, rdm_cor_fit=None, zi_list=None, exog_infl=None,
|
@@ -2321,7 +2331,7 @@ class ObjectiveFunction(object):
|
|
2321
2331
|
sorted(my_dict, key=lambda x: x[0]['pval_percentage'])
|
2322
2332
|
|
2323
2333
|
def get_fitness(self, vector, multi=False, verbose=False, max_routine=3):
|
2324
|
-
obj_1 = 10.0 **
|
2334
|
+
obj_1 = 10.0 ** 5
|
2325
2335
|
obj_best = None
|
2326
2336
|
sub_slns = list()
|
2327
2337
|
|
@@ -2332,7 +2342,7 @@ class ObjectiveFunction(object):
|
|
2332
2342
|
try:
|
2333
2343
|
self.repair(vector)
|
2334
2344
|
except Exception as e:
|
2335
|
-
print('
|
2345
|
+
print('prolem repairing here')
|
2336
2346
|
print(vector)
|
2337
2347
|
print(e)
|
2338
2348
|
layout = vector.copy()
|
@@ -2613,7 +2623,7 @@ class ObjectiveFunction(object):
|
|
2613
2623
|
self._hmcr = (
|
2614
2624
|
self._hmcr_min + ((self._hmcr_max - self._hmcr_min) / self._max_imp) * iteration)
|
2615
2625
|
|
2616
|
-
|
2626
|
+
|
2617
2627
|
|
2618
2628
|
def update_par(self, iteration, is_sin=False):
|
2619
2629
|
"""
|
@@ -2833,10 +2843,6 @@ class ObjectiveFunction(object):
|
|
2833
2843
|
'''
|
2834
2844
|
#return score
|
2835
2845
|
|
2836
|
-
|
2837
|
-
|
2838
|
-
|
2839
|
-
|
2840
2846
|
try:
|
2841
2847
|
if alpha is None:
|
2842
2848
|
alpha = np.exp(params[-1])
|
@@ -3467,6 +3473,8 @@ class ObjectiveFunction(object):
|
|
3467
3473
|
corr_pairs = list(itertools.combinations(self.Kr, 2))
|
3468
3474
|
else:
|
3469
3475
|
corr_pairs = list(itertools.combinations(corr_indices, 2))
|
3476
|
+
if len(corr_pairs) >0:
|
3477
|
+
print('maybe get the terms here')
|
3470
3478
|
|
3471
3479
|
for ii, corr_pair in enumerate(corr_pairs):
|
3472
3480
|
# lower cholesky matrix
|
@@ -3495,7 +3503,7 @@ class ObjectiveFunction(object):
|
|
3495
3503
|
a = 0
|
3496
3504
|
b = 0
|
3497
3505
|
stuff = []
|
3498
|
-
#
|
3506
|
+
# TODO get order
|
3499
3507
|
for j, i in enumerate(list_sizes):
|
3500
3508
|
br_mean = betas_hetro[a:i + a]
|
3501
3509
|
a += i
|
@@ -3522,7 +3530,30 @@ class ObjectiveFunction(object):
|
|
3522
3530
|
br_mean = betas_m
|
3523
3531
|
br_sd = betas_sd # Last Kr positions
|
3524
3532
|
# Compute: betas = mean + sd*draws
|
3525
|
-
|
3533
|
+
if len(br_sd) != draws.shape[1]:
|
3534
|
+
#get the same size as the mean
|
3535
|
+
betas_random = self.Br.copy()
|
3536
|
+
|
3537
|
+
'''
|
3538
|
+
c = self.get_num_params()[3:5]
|
3539
|
+
|
3540
|
+
cor = []
|
3541
|
+
for i in range(c[0]):
|
3542
|
+
cor.append(i)
|
3543
|
+
|
3544
|
+
vall =[]
|
3545
|
+
for i, val in enumerate(reversed(br_sd)):
|
3546
|
+
vall.append()
|
3547
|
+
|
3548
|
+
remaining = draws.shape[1] - len(betas_sd)
|
3549
|
+
'''
|
3550
|
+
|
3551
|
+
else:
|
3552
|
+
|
3553
|
+
|
3554
|
+
betas_random = br_mean[None, :, None] + draws * br_sd[None, :, None]
|
3555
|
+
|
3556
|
+
|
3526
3557
|
betas_random = self._apply_distribution(betas_random)
|
3527
3558
|
|
3528
3559
|
return betas_random
|
@@ -3959,7 +3990,7 @@ class ObjectiveFunction(object):
|
|
3959
3990
|
# proba_r = self.poisson_lognormal_pmf(y, eVd, sig)
|
3960
3991
|
proba_r = np.array(store)
|
3961
3992
|
proba_r = np.atleast_2d(proba_r).T
|
3962
|
-
|
3993
|
+
|
3963
3994
|
|
3964
3995
|
else:
|
3965
3996
|
raise Exception('not implemented other modeling forms')
|
@@ -4137,12 +4168,13 @@ class ObjectiveFunction(object):
|
|
4137
4168
|
br, draws_, brstd, dis_fit_long) # (N,K,R)
|
4138
4169
|
dprod_r = dev.np.einsum("njk,njr -> nkr", Xdr,
|
4139
4170
|
einsum_model_form, dtype=np.float64) # (N,K,R)
|
4140
|
-
der_prod_r = dprod_r * der * proba_n[:, None, :] # (N,K,R)
|
4171
|
+
#der_prod_r = dprod_r * der * proba_n[:, None, :] # (N,K,R)
|
4141
4172
|
#der_prod_r = dprod_r * der * proba_n[:, X_tril_idx, :] # I think this is the case check
|
4142
|
-
|
4143
|
-
#
|
4173
|
+
|
4174
|
+
der_prod_r = dprod_r * der * proba_n[:, None, :] # or this one
|
4175
|
+
|
4144
4176
|
der_t = self._compute_derivatives(
|
4145
|
-
br, draws_[:, draws_tril_idx, :], brstd, self.dist_fit) # (N,K,R)
|
4177
|
+
br[draws_tril_idx], draws_[:, draws_tril_idx, :], brstd, np.array(self.dist_fit)[draws_tril_idx]) # (N,K,R)
|
4146
4178
|
# er_t = self._compute_derivatives(br, draws_, brstd[:, draws_tril_idx,: ], self.dist_fit, draws_tril_idx)
|
4147
4179
|
der_prod_r_t = dprod_r[:, draws_tril_idx, :] * \
|
4148
4180
|
der_t * proba_n[:, None, :] # (N,K,R)
|
@@ -4209,12 +4241,12 @@ class ObjectiveFunction(object):
|
|
4209
4241
|
else:
|
4210
4242
|
grad_n = self._concat_gradients(
|
4211
4243
|
(gr_f, gr_u, gr_s, gr_h, gr_hs, gr_d[:, None])) / Rlik # (N,K)
|
4212
|
-
grad_n = np.nan_to_num(grad_n, nan=0, posinf=
|
4213
|
-
grad_n = np.clip(grad_n, -
|
4244
|
+
grad_n = np.nan_to_num(grad_n, nan=0, posinf=1000, neginf=-1000)
|
4245
|
+
grad_n = np.clip(grad_n, -100, 100)
|
4214
4246
|
n = np.shape(grad_n)[0]
|
4215
4247
|
# subtract out mean gradient value
|
4216
|
-
|
4217
|
-
|
4248
|
+
grad_n_sub = grad_n-(np.sum(grad_n, axis=0)/n)
|
4249
|
+
grad_n = grad_n_sub
|
4218
4250
|
grad = grad_n.sum(axis=0)
|
4219
4251
|
return grad, grad_n
|
4220
4252
|
|
@@ -4574,7 +4606,7 @@ class ObjectiveFunction(object):
|
|
4574
4606
|
penalty = self.regularise_l2(betas)
|
4575
4607
|
|
4576
4608
|
if not np.isreal(loglik):
|
4577
|
-
loglik = -
|
4609
|
+
loglik = - 10000000.0
|
4578
4610
|
|
4579
4611
|
output = (-loglik + penalty,)
|
4580
4612
|
if return_gradient:
|
@@ -4817,7 +4849,7 @@ class ObjectiveFunction(object):
|
|
4817
4849
|
proba.append(dev.to_cpu(proba_))
|
4818
4850
|
|
4819
4851
|
lik = np.stack(proba).sum(axis=0) / R # (N, )
|
4820
|
-
lik = np.clip(lik, min_comp_val,
|
4852
|
+
lik = np.clip(lik, min_comp_val, 1000)
|
4821
4853
|
# lik = np.nan_to_num(lik, )
|
4822
4854
|
loglik = np.log(lik)
|
4823
4855
|
llf_main = loglik
|
@@ -5435,7 +5467,7 @@ class ObjectiveFunction(object):
|
|
5435
5467
|
|
5436
5468
|
|
5437
5469
|
sol = Solution()
|
5438
|
-
|
5470
|
+
|
5439
5471
|
tol = {'ftol': 1e-8, 'gtol': 1e-6}
|
5440
5472
|
is_delete = 0
|
5441
5473
|
dispersion = mod.get('dispersion')
|
@@ -5793,7 +5825,7 @@ class ObjectiveFunction(object):
|
|
5793
5825
|
initial_fit_beta = betas_est.x
|
5794
5826
|
parmas = np.append(initial_fit_beta, nb_parma)
|
5795
5827
|
self.nb_parma = nb_parma
|
5796
|
-
print(f'neg binomi,{self.nb_parma}')
|
5828
|
+
#print(f'neg binomi,{self.nb_parma}')
|
5797
5829
|
betas_est = self._minimize(self._loglik_gradient, initial_fit_beta, args=(
|
5798
5830
|
X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
|
5799
5831
|
self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
|
@@ -5801,7 +5833,7 @@ class ObjectiveFunction(object):
|
|
5801
5833
|
options={'gtol': tol['gtol']}, bounds=bounds,
|
5802
5834
|
hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
|
5803
5835
|
|
5804
|
-
print('refit with estimation of NB')
|
5836
|
+
#print('refit with estimation of NB')
|
5805
5837
|
# self.numerical_hessian_calc = True
|
5806
5838
|
if self.numerical_hessian_calc:
|
5807
5839
|
try:
|
@@ -6184,6 +6216,7 @@ class ObjectiveFunction(object):
|
|
6184
6216
|
df_test[:, :, idx], model_nature.get('transformations')[idx] = self.transformer(
|
6185
6217
|
t, idx, df_test[:, :, idx])
|
6186
6218
|
if np.max(df_tf[:, :, idx]) >= 77000:
|
6219
|
+
#TODO need to normalise the data
|
6187
6220
|
|
6188
6221
|
print('should not be possible')
|
6189
6222
|
|
@@ -6242,7 +6275,7 @@ class ObjectiveFunction(object):
|
|
6242
6275
|
model_nature['XH'] = XH
|
6243
6276
|
X_test = None
|
6244
6277
|
if np.isin(X, [np.inf, -np.inf, None, np.nan]).any(): # type ignore
|
6245
|
-
raise Exception('there is some kind of error')
|
6278
|
+
raise Exception('there is some kind of error in X')
|
6246
6279
|
|
6247
6280
|
# numpy data setup fpr estimation
|
6248
6281
|
indices2 = self.get_named_indices(self.rdm_fit)
|
@@ -6393,6 +6426,53 @@ class ObjectiveFunction(object):
|
|
6393
6426
|
|
6394
6427
|
return obj_1, model_nature
|
6395
6428
|
|
6429
|
+
def get_X_tril(self):
|
6430
|
+
'''For correlations find the repeating terms'''
|
6431
|
+
varnames = self.none_join([self.rdm_grouped_fit, self.rdm_fit, self.rdm_cor_fit])
|
6432
|
+
rv_count_all = 0
|
6433
|
+
chol_count = 0
|
6434
|
+
rv_count = 0
|
6435
|
+
corr_indices = []
|
6436
|
+
rv_indices = []
|
6437
|
+
for ii, var in enumerate(varnames): # TODO: BUGFIXf
|
6438
|
+
if var in self.none_handler(self.rdm_cor_fit):
|
6439
|
+
is_correlated = True
|
6440
|
+
else:
|
6441
|
+
is_correlated = False
|
6442
|
+
|
6443
|
+
rv_count_all += 1
|
6444
|
+
if is_correlated:
|
6445
|
+
chol_count += 1
|
6446
|
+
else:
|
6447
|
+
rv_count += 1
|
6448
|
+
|
6449
|
+
if var in self.none_handler(self.rdm_cor_fit):
|
6450
|
+
|
6451
|
+
corr_indices.append(rv_count_all - 1) # TODO: what does tis do
|
6452
|
+
|
6453
|
+
else:
|
6454
|
+
rv_indices.append(rv_count_all - 1)
|
6455
|
+
|
6456
|
+
# for s.d.: gr_w = (Obs prob. minus predicted probability) * obs. var * random draw
|
6457
|
+
draws_tril_idx = np.array([corr_indices[j]
|
6458
|
+
for i in range(len(self.none_handler(self.rdm_cor_fit)))
|
6459
|
+
for j in range(i + 1)]) # varnames pos.
|
6460
|
+
X_tril_idx = np.array([corr_indices[i]
|
6461
|
+
for i in range(len(self.none_handler(self.rdm_cor_fit)))
|
6462
|
+
for j in range(i + 1)])
|
6463
|
+
# Find the s.d. for random variables that are not correlated
|
6464
|
+
var_uncor = self.none_join([self.rdm_grouped_fit, self.rdm_fit])
|
6465
|
+
range_var = [x for x in
|
6466
|
+
range(len(self.none_handler(var_uncor)))]
|
6467
|
+
range_var = sorted(range_var)
|
6468
|
+
draws_tril_idx = np.array(np.concatenate((range_var, draws_tril_idx)))
|
6469
|
+
X_tril_idx = np.array(np.concatenate((range_var, X_tril_idx)))
|
6470
|
+
draws_tril_idx = draws_tril_idx.astype(int)
|
6471
|
+
X_tril_idx = X_tril_idx.astype(int)
|
6472
|
+
return X_tril_idx
|
6473
|
+
|
6474
|
+
|
6475
|
+
|
6396
6476
|
def modifyn(self, data):
|
6397
6477
|
select_data = self._characteristics_names
|
6398
6478
|
alpha = np.isin(select_data, [item.split(':')[0] for item in data['fixed_fit']]).astype(int).tolist()
|
@@ -6600,23 +6680,35 @@ class ObjectiveFunction(object):
|
|
6600
6680
|
# N, D = draws.shape[0], draws.shape[1]
|
6601
6681
|
N, R, Kr = draws.shape[0], draws.shape[2], draws.shape[1]
|
6602
6682
|
der = dev.np.ones((N, Kr, R), dtype=draws.dtype)
|
6603
|
-
if len(self.none_handler(self.rdm_cor_fit)) == 0:
|
6604
|
-
Br_come_one = self.Br.copy()
|
6605
|
-
# Br_come_one =
|
6606
|
-
else:
|
6607
6683
|
|
6608
|
-
Br_come_one = self.Br.copy()
|
6609
6684
|
# betas_random = self._transform_rand_betas(betas, betas_std, draws)
|
6610
6685
|
#todo make sure this works for ln and truncated normal
|
6611
6686
|
if any(set(distribution).intersection(['ln_normal', 'tn_normal'])):
|
6612
|
-
|
6687
|
+
|
6688
|
+
#print('check this, intesection shouldn not happen for all')
|
6689
|
+
|
6690
|
+
if der.shape[1] != draws.shape[1]:
|
6691
|
+
print('why')
|
6613
6692
|
Br_come_one = self._transform_rand_betas(betas, betas_std, draws)
|
6693
|
+
if der.shape[1] != draws.shape[1]:
|
6694
|
+
print('why')
|
6695
|
+
#TODO need to get the stuction of the rdms
|
6614
6696
|
for k, dist_k in enumerate(distribution):
|
6615
6697
|
if dist_k == 'ln_normal':
|
6698
|
+
if der.shape[1] != draws.shape[1]:
|
6699
|
+
print('why')
|
6616
6700
|
der[:, k, :] = Br_come_one[:, k, :]
|
6701
|
+
if der.shape[1] != draws.shape[1]:
|
6702
|
+
print('why')
|
6617
6703
|
elif dist_k == 'tn_normal':
|
6704
|
+
if der.shape[1] != draws.shape[1]:
|
6705
|
+
print('why')
|
6618
6706
|
der[:, k, :] = 1 * (Br_come_one[:, k, :] > 0)
|
6707
|
+
if der.shape[1] != draws.shape[1]:
|
6708
|
+
print('why')
|
6619
6709
|
|
6710
|
+
if der.shape[1] != draws.shape[1]:
|
6711
|
+
print('why')
|
6620
6712
|
return der
|
6621
6713
|
|
6622
6714
|
def _copy_size_display_as_ones(self, matrix):
|
@@ -3,18 +3,18 @@ metacountregressor/_device_cust.py,sha256=759fnKmTYccJm4Lpi9_1reurh6OB9d6q9soPR0
|
|
3
3
|
metacountregressor/app_main.py,sha256=vY3GczTbGbBRalbzMkl_9jVW7RMgEOc6z2Dr1IZJv9c,10014
|
4
4
|
metacountregressor/data_split_helper.py,sha256=M2fIMdIO8znUaYhx5wlacRyNWdQjNYu1z1wkE-kFUYU,3373
|
5
5
|
metacountregressor/halton.py,sha256=jhovA45UBoZYU9g-hl6Lb2sBIx_ZBTNdPrpgkzR9fng,9463
|
6
|
-
metacountregressor/helperprocess.py,sha256=
|
7
|
-
metacountregressor/main.py,sha256=
|
6
|
+
metacountregressor/helperprocess.py,sha256=mjdcuelR_9MKRSC--RnmgfQWMp9l9fybNZpSyDEWq-A,15016
|
7
|
+
metacountregressor/main.py,sha256=rWUs3xY4wH6UBdn6nqyoOPTYDweRrye-ZfNMlBdbuHg,22714
|
8
8
|
metacountregressor/main_old.py,sha256=eTS4ygq27MnU-dZ_j983Ucb-D5XfbVF8OJQK2hVVLZc,24123
|
9
|
-
metacountregressor/metaheuristics.py,sha256=
|
9
|
+
metacountregressor/metaheuristics.py,sha256=Kkx1Jfox6NBlm5zVrI26Vc_NI7NFQSS9dinrZU9SpV8,105871
|
10
10
|
metacountregressor/pareto_file.py,sha256=whySaoPAUWYjyI8zo0hwAOa3rFk6SIUlHSpqZiLur0k,23096
|
11
11
|
metacountregressor/pareto_logger__plot.py,sha256=mEU2QN4wmsM7t39GJ_XhJ_jjsdl09JOmG0U2jICrAkI,30037
|
12
12
|
metacountregressor/setup.py,sha256=8w6IqX0tJsbYrOI1BJLIJCIvOnunKli5I9fsF5PhHv4,919
|
13
13
|
metacountregressor/single_objective_finder.py,sha256=jVG7GJBqzSP4_riYr-kMMKy_LE3SlGmKMunNhHYxgRg,8011
|
14
|
-
metacountregressor/solution.py,sha256=
|
14
|
+
metacountregressor/solution.py,sha256=OJqB00cvGMLFei6RsjphPamOdLm3EWOOzK7k-uVbvFY,277671
|
15
15
|
metacountregressor/test_generated_paper2.py,sha256=pwOoRzl1jJIIOUAAvbkT6HmmTQ81mwpsshn9SLdKOg8,3927
|
16
|
-
metacountregressor-0.1.
|
17
|
-
metacountregressor-0.1.
|
18
|
-
metacountregressor-0.1.
|
19
|
-
metacountregressor-0.1.
|
20
|
-
metacountregressor-0.1.
|
16
|
+
metacountregressor-0.1.122.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
17
|
+
metacountregressor-0.1.122.dist-info/METADATA,sha256=FVjcOQD1_WwEKGYaen8netc1hx_Mxx1g71ajF-qVOJQ,23415
|
18
|
+
metacountregressor-0.1.122.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
19
|
+
metacountregressor-0.1.122.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
|
20
|
+
metacountregressor-0.1.122.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|