metacountregressor 0.1.118__tar.gz → 0.1.120__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (27) hide show
  1. {metacountregressor-0.1.118 → metacountregressor-0.1.120}/PKG-INFO +1 -1
  2. {metacountregressor-0.1.118 → metacountregressor-0.1.120}/metacountregressor/helperprocess.py +71 -0
  3. {metacountregressor-0.1.118 → metacountregressor-0.1.120}/metacountregressor/main.py +113 -53
  4. {metacountregressor-0.1.118 → metacountregressor-0.1.120}/metacountregressor/metaheuristics.py +1 -1
  5. {metacountregressor-0.1.118 → metacountregressor-0.1.120}/metacountregressor/solution.py +126 -34
  6. {metacountregressor-0.1.118 → metacountregressor-0.1.120}/metacountregressor.egg-info/PKG-INFO +1 -1
  7. {metacountregressor-0.1.118 → metacountregressor-0.1.120}/LICENSE.txt +0 -0
  8. {metacountregressor-0.1.118 → metacountregressor-0.1.120}/README.rst +0 -0
  9. {metacountregressor-0.1.118 → metacountregressor-0.1.120}/metacountregressor/__init__.py +0 -0
  10. {metacountregressor-0.1.118 → metacountregressor-0.1.120}/metacountregressor/_device_cust.py +0 -0
  11. {metacountregressor-0.1.118 → metacountregressor-0.1.120}/metacountregressor/app_main.py +0 -0
  12. {metacountregressor-0.1.118 → metacountregressor-0.1.120}/metacountregressor/data_split_helper.py +0 -0
  13. {metacountregressor-0.1.118 → metacountregressor-0.1.120}/metacountregressor/halton.py +0 -0
  14. {metacountregressor-0.1.118 → metacountregressor-0.1.120}/metacountregressor/main_old.py +0 -0
  15. {metacountregressor-0.1.118 → metacountregressor-0.1.120}/metacountregressor/pareto_file.py +0 -0
  16. {metacountregressor-0.1.118 → metacountregressor-0.1.120}/metacountregressor/pareto_logger__plot.py +0 -0
  17. {metacountregressor-0.1.118 → metacountregressor-0.1.120}/metacountregressor/setup.py +0 -0
  18. {metacountregressor-0.1.118 → metacountregressor-0.1.120}/metacountregressor/single_objective_finder.py +0 -0
  19. {metacountregressor-0.1.118 → metacountregressor-0.1.120}/metacountregressor/test_generated_paper2.py +0 -0
  20. {metacountregressor-0.1.118 → metacountregressor-0.1.120}/metacountregressor.egg-info/SOURCES.txt +0 -0
  21. {metacountregressor-0.1.118 → metacountregressor-0.1.120}/metacountregressor.egg-info/dependency_links.txt +0 -0
  22. {metacountregressor-0.1.118 → metacountregressor-0.1.120}/metacountregressor.egg-info/not-zip-safe +0 -0
  23. {metacountregressor-0.1.118 → metacountregressor-0.1.120}/metacountregressor.egg-info/requires.txt +0 -0
  24. {metacountregressor-0.1.118 → metacountregressor-0.1.120}/metacountregressor.egg-info/top_level.txt +0 -0
  25. {metacountregressor-0.1.118 → metacountregressor-0.1.120}/setup.cfg +0 -0
  26. {metacountregressor-0.1.118 → metacountregressor-0.1.120}/setup.py +0 -0
  27. {metacountregressor-0.1.118 → metacountregressor-0.1.120}/tests/test.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: metacountregressor
3
- Version: 0.1.118
3
+ Version: 0.1.120
4
4
  Summary: Extensions for a Python package for estimation of count models.
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern
@@ -3,6 +3,7 @@ import pandas as pd
3
3
  import csv
4
4
  import matplotlib.pyplot as plt
5
5
 
6
+
6
7
  plt.style.use('https://github.com/dhaitz/matplotlib-stylesheets/raw/master/pitayasmoothie-dark.mplstyle')
7
8
 
8
9
  ##Select the best Features Based on RF
@@ -151,6 +152,74 @@ def remove_files(yes=1):
151
152
  os.remove('pop_log.csv')
152
153
 
153
154
 
155
+ # Function to process the DataFrame
156
+ '''
157
+ Example usuage
158
+ # Configuration dictionary
159
+ config = {
160
+ 'Age': {
161
+ 'type': 'bin',
162
+ 'bins': [0, 18, 35, 50, 100],
163
+ 'labels': ['Child', 'YoungAdult', 'MiddleAged', 'Senior'],
164
+ 'prefix': 'Age_Binned'
165
+ },
166
+ 'Income': {
167
+ 'type': 'bin',
168
+ 'bins': [0, 2000, 5000, 10000],
169
+ 'labels': ['Low', 'Medium', 'High'],
170
+ 'prefix': 'Income_Binned'
171
+ },
172
+ 'Gender': {
173
+ 'type': 'one-hot',
174
+ 'prefix': 'Gender'
175
+ },
176
+ 'Score': {
177
+ 'type': 'none'
178
+ }
179
+ }
180
+ '''
181
+
182
+
183
+ def transform_dataframe(df, config):
184
+ output_df = pd.DataFrame()
185
+
186
+ for column, settings in config.items():
187
+ if settings['type'] == 'bin':
188
+ # Apply binning
189
+ binned = pd.cut(
190
+ df[column],
191
+ bins=settings['bins'],
192
+ labels=settings['labels'],
193
+ right=False
194
+ )
195
+ # One-hot encode the binned column
196
+ binned_dummies = pd.get_dummies(binned, prefix=settings['prefix'])
197
+ output_df = pd.concat([output_df, binned_dummies], axis=1)
198
+
199
+ elif settings['type'] == 'one-hot':
200
+ # One-hot encode the column
201
+ one_hot_dummies = pd.get_dummies(df[column], prefix=settings.get('prefix', column))
202
+ output_df = pd.concat([output_df, one_hot_dummies], axis=1)
203
+
204
+ elif settings['type'] == 'continuous':
205
+ # Apply function to continuous data
206
+ data = df[column]
207
+ if 'bounds' in settings:
208
+ # Apply bounds filtering
209
+ lower, upper = settings['bounds']
210
+ data = data[(data >= lower) & (data <= upper)]
211
+ if 'apply_func' in settings:
212
+ # Apply custom function
213
+ data = data.apply(settings['apply_func'])
214
+ output_df[column] = data
215
+
216
+ elif settings['type'] == 'none':
217
+ # Leave the column unchanged
218
+ output_df = pd.concat([output_df, df[[column]]], axis=1)
219
+
220
+ return output_df
221
+
222
+
154
223
  def as_wide_factor(x_df, yes=1, min_factor=2, max_factor=8, keep_original=0, exclude=[]):
155
224
  if not yes:
156
225
  return x_df
@@ -330,3 +399,5 @@ def entries_to_remove(entries, the_dict):
330
399
  for key in entries:
331
400
  if key in the_dict:
332
401
  del the_dict[key]
402
+
403
+
@@ -28,12 +28,60 @@ def convert_df_columns_to_binary_and_wide(df):
28
28
  return df
29
29
 
30
30
 
31
- def process_arguments():
31
+ def process_arguments(**kwargs):
32
32
  '''
33
33
  TRYING TO TURN THE CSV FILES INTO RELEVANT ARGS
34
34
  '''
35
- data_characteristic = pd.read_csv('problem_data.csv')
36
- analyst_d = pd.read_csv('decisions.csv')
35
+ #dataset
36
+ if kwargs.get('dataset_file', False
37
+ ):
38
+ dataset = pd.read_csv(kwargs.get('dataset_file'))
39
+ named_data_headers = dataset.columns.tolist()
40
+ decision_constants = {name: list(range(7)) for name in named_data_headers}
41
+ data_info = {
42
+
43
+
44
+ 'AADT': {
45
+ 'type': 'continuous',
46
+ 'bounds': [0.0, np.infty],
47
+ 'discrete': False,
48
+ 'apply_func': (lambda x: np.log(x + 1)),
49
+ },
50
+ 'SPEED': {
51
+ 'type': 'continuous',
52
+ 'bounds': [0, 100],
53
+ 'enforce_bounds': True,
54
+ 'discrete': True
55
+ },
56
+ 'TIME': {
57
+ 'type': 'continuous',
58
+ 'bounds': [0, 23.999],
59
+ 'discrete': False
60
+ }
61
+ }
62
+ #remove ID CoLUMNS from dataset
63
+ dataset = dataset.drop(columns = [
64
+ 'ID'
65
+ ])
66
+ for c in dataset.columns:
67
+ if c not in data_info.keys():
68
+ data_info[c] = {'type': 'categorical'}
69
+
70
+ data_new =helperprocess.transform_dataframe(dataset,data_info)
71
+
72
+ update_constant = kwargs.get('analyst_constraints')
73
+ #update the decision_constraints
74
+
75
+ data_characteristic = pd.read_csv(kwargs.get('problem_data', 'problem_data.csv'))
76
+ # Extract the column as a list of characteristic names
77
+ name_data_characteristics = data_characteristic.columns.tolist()
78
+
79
+ # Create the dictionary
80
+ decision_constraints = {name: list(range(7)) for name in name_data_characteristics}
81
+
82
+ print('this gets all the features, I need to remove...')
83
+
84
+ analyst_d = pd.read_csv(kwargs.get('decison_constraints', 'decisions.csv'))
37
85
  hyper = pd.read_csv('setup_hyper.csv')
38
86
 
39
87
  new_data = {'data': data_characteristic,
@@ -41,7 +89,7 @@ def process_arguments():
41
89
  'hyper': hyper}
42
90
  return new_data
43
91
 
44
- def process_package_argumemnts():
92
+ def process_package_arguments():
45
93
 
46
94
  new_data = {}
47
95
  pass
@@ -319,8 +367,8 @@ def main(args, **kwargs):
319
367
  x_df = helperprocess.interactions(x_df, keep)
320
368
 
321
369
 
322
- else: # the dataset has been selected in the program as something else
323
- data_info = process_arguments()
370
+ elif dataset ==10: # the dataset has been selected in the program as something else
371
+ data_info = process_arguments(**args)
324
372
  data_info['hyper']
325
373
  data_info['analyst']
326
374
  data_info['data']['Y']
@@ -339,6 +387,10 @@ def main(args, **kwargs):
339
387
  y_df = df[[data_info['data']['Y'][0]]]
340
388
  y_df.rename(columns={data_info['data']['Y'][0]: "Y"}, inplace=True)
341
389
  print('test') #FIXME
390
+ else:
391
+ print('PROCESS THE PACKAGE ARGUMENTS SIMULIAR TO HOW ONE WOULD DEFINE THE ENVIRONMENT')
392
+ data_info =process_package_argumemnts()
393
+
342
394
 
343
395
  if args['Keep_Fit'] == str(2) or args['Keep_Fit'] == 2:
344
396
  if manual_fit_spec is None:
@@ -444,55 +496,63 @@ if __name__ == '__main__':
444
496
  parser = argparse.ArgumentParser(prog='main',
445
497
  epilog=main.__doc__,
446
498
  formatter_class=argparse.RawDescriptionHelpFormatter, conflict_handler='resolve')
447
-
448
- parser.add_argument('-line', type=int, default=1,
449
- help='line to read in csv to pass in argument')
450
-
451
- if vars(parser.parse_args())['line'] is not None:
452
- reader = csv.DictReader(open('set_data.csv', 'r'))
453
- args = list()
454
- line_number_obs = 0
455
- for dictionary in reader: # TODO find a way to handle multiple args
456
- args = dictionary
457
- if line_number_obs == int(vars(parser.parse_args())['line']):
458
- break
459
- line_number_obs += 1
460
- args = dict(args)
461
-
462
- for key, value in args.items():
463
- try:
464
- # Attempt to parse the string value to a Python literal if value is a string.
465
- if isinstance(value, str):
466
- value = ast.literal_eval(value)
467
- except (ValueError, SyntaxError):
468
- # If there's a parsing error, value remains as the original string.
469
- pass
470
-
471
- # Add the argument to the parser with the potentially updated value.
472
- parser.add_argument(f'-{key}', default=value)
473
-
474
- for i, action in enumerate(parser._optionals._actions):
475
- if "-algorithm" in action.option_strings:
476
- parser._optionals._actions[i].help = "optimization algorithm"
477
-
478
- override = False
479
- if override:
480
- print('WARNING: TESTING ENVIRONMENT, TURN OFF FOR RELEASE')
481
- parser.add_argument('-problem_number', default='10')
482
-
483
- if 'algorithm' not in args:
484
- parser.add_argument('-algorithm', type=str, default='hs',
485
- help='optimization algorithm')
486
- elif 'Manual_Fit' not in args:
487
- parser.add_argument('-Manual_Fit', action='store_false', default=None,
488
- help='To fit a model manually if desired.')
489
-
490
- parser.add_argument('-seperate_out_factors', action='store_false', default=False,
491
- help='Trie of wanting to split data that is potentially categorical as binary'
492
- ' we want to split the data for processing')
493
- parser.add_argument('-supply_csv', type = str, help = 'enter the name of the csv, please include it as a full directorys')
499
+
500
+
501
+ BATCH_JOB = True
502
+
503
+ if BATCH_JOB:
504
+ parser.add_argument('-dataset_file', default='data/Ex-16-3.csv', help='supply the path to the dataset')
505
+
506
+ parser.add_argument('-line', type=int, default=1,
507
+ help='line to read in csv to pass in argument')
508
+
509
+ if vars(parser.parse_args())['line'] is not None:
510
+ reader = csv.DictReader(open('set_data.csv', 'r'))
511
+ args = list()
512
+ line_number_obs = 0
513
+ for dictionary in reader: # TODO find a way to handle multiple args
514
+ args = dictionary
515
+ if line_number_obs == int(vars(parser.parse_args())['line']):
516
+ break
517
+ line_number_obs += 1
518
+ args = dict(args)
519
+
520
+
521
+ for key, value in args.items():
522
+ try:
523
+ # Attempt to parse the string value to a Python literal if value is a string.
524
+ if isinstance(value, str):
525
+ value = ast.literal_eval(value)
526
+ except (ValueError, SyntaxError):
527
+ # If there's a parsing error, value remains as the original string.
528
+ pass
529
+
530
+ # Add the argument to the parser with the potentially updated value.
531
+ parser.add_argument(f'-{key}', default=value)
532
+
533
+ for i, action in enumerate(parser._optionals._actions):
534
+ if "-algorithm" in action.option_strings:
535
+ parser._optionals._actions[i].help = "optimization algorithm"
536
+
537
+ override = True
538
+ if override:
539
+ print('WARNING: TESTING ENVIRONMENT, TURN OFF FOR RELEASE')
540
+ parser.add_argument('-problem_number', default='10')
541
+
542
+ if 'algorithm' not in args:
543
+ parser.add_argument('-algorithm', type=str, default='hs',
544
+ help='optimization algorithm')
545
+ elif 'Manual_Fit' not in args:
546
+ parser.add_argument('-Manual_Fit', action='store_false', default=None,
547
+ help='To fit a model manually if desired.')
548
+
549
+ parser.add_argument('-seperate_out_factors', action='store_false', default=False,
550
+ help='Trie of wanting to split data that is potentially categorical as binary'
551
+ ' we want to split the data for processing')
552
+ parser.add_argument('-supply_csv', type = str, help = 'enter the name of the csv, please include it as a full directories')
494
553
 
495
554
  else: # DIDN"T SPECIFY LINES TRY EACH ONE MANNUALY
555
+ print("RUNNING WITH ARGS")
496
556
  parser.add_argument('-com', type=str, default='MetaCode',
497
557
  help='line to read csv')
498
558
 
@@ -72,7 +72,7 @@ def dict_mean(dict_list,
72
72
  mean_dict[key] = sum(d[key] for d in dict_list) / len(dict_list)
73
73
  return mean_dict
74
74
  else:
75
-
75
+ mean_dict = {}
76
76
  for key in dict_list[0].keys():
77
77
  if key in ignore:
78
78
  continue
@@ -152,7 +152,7 @@ class ObjectiveFunction(object):
152
152
  self.dist_fit = None
153
153
 
154
154
  self.MAE = None
155
- self.best_obj_1 = 100000000.0
155
+ self.best_obj_1 = 1000000.0
156
156
  self._obj_1 = 'bic'
157
157
  self._obj_2 = 'MSE'
158
158
  self.numerical_hessian_calc = 0 # calculates hessian by statsmodels otherwise scipy
@@ -395,7 +395,7 @@ class ObjectiveFunction(object):
395
395
 
396
396
 
397
397
 
398
- self.Ndraws = 1400 # todo: change back
398
+ self.Ndraws = 200 # todo: change back
399
399
  self.draws1 = None
400
400
  self.initial_sig = 1 # pass the test of a single model
401
401
  self.pvalue_sig_value = .1
@@ -449,8 +449,17 @@ class ObjectiveFunction(object):
449
449
  if 'model_types' in kwargs:
450
450
  model_types = kwargs['model_types']
451
451
  else:
452
- model_types = [[0, 1]] # add 2 for Generalized Poisson
452
+ print('the type of models possible are:')
453
453
 
454
+ model_types = [[0, 1]] # add 2 for Generalized Poisson
455
+ model_types = [[0]]
456
+ #TODO change back and fix NB
457
+ model_t_dict = {'Poisson':0,
458
+ "NB":1}
459
+ # Retrieve the keys (model names) corresponding to the values in model_types
460
+ model_keys = [key for key, value in model_t_dict.items() if value in model_types[0]]
461
+ # Print the formatted result
462
+ print(f'The type of models possible are: {", ".join(model_keys)}')
454
463
  self._discrete_values = self._discrete_values + self.define_poissible_transforms(
455
464
  self._transformations, kwargs.get('decisions',None)) + model_types
456
465
 
@@ -470,6 +479,7 @@ class ObjectiveFunction(object):
470
479
  #Manually fit from analyst specification
471
480
  manual_fit = kwargs.get('Manual_Fit')
472
481
  if manual_fit is not None:
482
+ print('fitting manual')
473
483
  self.process_manual_fit(manual_fit)
474
484
 
475
485
  self.solution_analyst = None
@@ -1372,7 +1382,7 @@ class ObjectiveFunction(object):
1372
1382
  bb = eVy -1
1373
1383
  disp = sm.OLS(ab.ravel(), bb.ravel()).fit()
1374
1384
  gamma = disp.params[0]
1375
- print(f'dispersion is {gamma}')
1385
+ #print(f'dispersion is {gamma}')
1376
1386
  return gamma
1377
1387
 
1378
1388
  def validation(self, betas, y, X, Xr=None, dispersion=0, rdm_cor_fit=None, zi_list=None, exog_infl=None,
@@ -2321,7 +2331,7 @@ class ObjectiveFunction(object):
2321
2331
  sorted(my_dict, key=lambda x: x[0]['pval_percentage'])
2322
2332
 
2323
2333
  def get_fitness(self, vector, multi=False, verbose=False, max_routine=3):
2324
- obj_1 = 10.0 ** 8
2334
+ obj_1 = 10.0 ** 5
2325
2335
  obj_best = None
2326
2336
  sub_slns = list()
2327
2337
 
@@ -2332,7 +2342,7 @@ class ObjectiveFunction(object):
2332
2342
  try:
2333
2343
  self.repair(vector)
2334
2344
  except Exception as e:
2335
- print('prob here')
2345
+ print('prolem repairing here')
2336
2346
  print(vector)
2337
2347
  print(e)
2338
2348
  layout = vector.copy()
@@ -2613,7 +2623,7 @@ class ObjectiveFunction(object):
2613
2623
  self._hmcr = (
2614
2624
  self._hmcr_min + ((self._hmcr_max - self._hmcr_min) / self._max_imp) * iteration)
2615
2625
 
2616
- # end def
2626
+
2617
2627
 
2618
2628
  def update_par(self, iteration, is_sin=False):
2619
2629
  """
@@ -2833,10 +2843,6 @@ class ObjectiveFunction(object):
2833
2843
  '''
2834
2844
  #return score
2835
2845
 
2836
-
2837
-
2838
-
2839
-
2840
2846
  try:
2841
2847
  if alpha is None:
2842
2848
  alpha = np.exp(params[-1])
@@ -3467,6 +3473,8 @@ class ObjectiveFunction(object):
3467
3473
  corr_pairs = list(itertools.combinations(self.Kr, 2))
3468
3474
  else:
3469
3475
  corr_pairs = list(itertools.combinations(corr_indices, 2))
3476
+ if len(corr_pairs) >0:
3477
+ print('maybe get the terms here')
3470
3478
 
3471
3479
  for ii, corr_pair in enumerate(corr_pairs):
3472
3480
  # lower cholesky matrix
@@ -3495,7 +3503,7 @@ class ObjectiveFunction(object):
3495
3503
  a = 0
3496
3504
  b = 0
3497
3505
  stuff = []
3498
- # todo get order
3506
+ # TODO get order
3499
3507
  for j, i in enumerate(list_sizes):
3500
3508
  br_mean = betas_hetro[a:i + a]
3501
3509
  a += i
@@ -3522,7 +3530,30 @@ class ObjectiveFunction(object):
3522
3530
  br_mean = betas_m
3523
3531
  br_sd = betas_sd # Last Kr positions
3524
3532
  # Compute: betas = mean + sd*draws
3525
- betas_random = br_mean[None, :, None] + draws * br_sd[None, :, None]
3533
+ if len(br_sd) != draws.shape[1]:
3534
+ #get the same size as the mean
3535
+ betas_random = self.Br.copy()
3536
+
3537
+ '''
3538
+ c = self.get_num_params()[3:5]
3539
+
3540
+ cor = []
3541
+ for i in range(c[0]):
3542
+ cor.append(i)
3543
+
3544
+ vall =[]
3545
+ for i, val in enumerate(reversed(br_sd)):
3546
+ vall.append()
3547
+
3548
+ remaining = draws.shape[1] - len(betas_sd)
3549
+ '''
3550
+
3551
+ else:
3552
+
3553
+
3554
+ betas_random = br_mean[None, :, None] + draws * br_sd[None, :, None]
3555
+
3556
+
3526
3557
  betas_random = self._apply_distribution(betas_random)
3527
3558
 
3528
3559
  return betas_random
@@ -3959,7 +3990,7 @@ class ObjectiveFunction(object):
3959
3990
  # proba_r = self.poisson_lognormal_pmf(y, eVd, sig)
3960
3991
  proba_r = np.array(store)
3961
3992
  proba_r = np.atleast_2d(proba_r).T
3962
- print(1)
3993
+
3963
3994
 
3964
3995
  else:
3965
3996
  raise Exception('not implemented other modeling forms')
@@ -4137,12 +4168,13 @@ class ObjectiveFunction(object):
4137
4168
  br, draws_, brstd, dis_fit_long) # (N,K,R)
4138
4169
  dprod_r = dev.np.einsum("njk,njr -> nkr", Xdr,
4139
4170
  einsum_model_form, dtype=np.float64) # (N,K,R)
4140
- der_prod_r = dprod_r * der * proba_n[:, None, :] # (N,K,R)
4171
+ #der_prod_r = dprod_r * der * proba_n[:, None, :] # (N,K,R)
4141
4172
  #der_prod_r = dprod_r * der * proba_n[:, X_tril_idx, :] # I think this is the case check
4142
- der_prod_r = dprod_r[:, X_tril_idx, :] * der * proba_n[:, None, :] # or this one
4143
- #print('which one of these')
4173
+
4174
+ der_prod_r = dprod_r * der * proba_n[:, None, :] # or this one
4175
+
4144
4176
  der_t = self._compute_derivatives(
4145
- br, draws_[:, draws_tril_idx, :], brstd, self.dist_fit) # (N,K,R)
4177
+ br[draws_tril_idx], draws_[:, draws_tril_idx, :], brstd, np.array(self.dist_fit)[draws_tril_idx]) # (N,K,R)
4146
4178
  # er_t = self._compute_derivatives(br, draws_, brstd[:, draws_tril_idx,: ], self.dist_fit, draws_tril_idx)
4147
4179
  der_prod_r_t = dprod_r[:, draws_tril_idx, :] * \
4148
4180
  der_t * proba_n[:, None, :] # (N,K,R)
@@ -4209,12 +4241,12 @@ class ObjectiveFunction(object):
4209
4241
  else:
4210
4242
  grad_n = self._concat_gradients(
4211
4243
  (gr_f, gr_u, gr_s, gr_h, gr_hs, gr_d[:, None])) / Rlik # (N,K)
4212
- grad_n = np.nan_to_num(grad_n, nan=0, posinf=10000, neginf=-10000)
4213
- grad_n = np.clip(grad_n, -1000, 1000)
4244
+ grad_n = np.nan_to_num(grad_n, nan=0, posinf=1000, neginf=-1000)
4245
+ grad_n = np.clip(grad_n, -100, 100)
4214
4246
  n = np.shape(grad_n)[0]
4215
4247
  # subtract out mean gradient value
4216
- # grad_n_sub = grad_n-(np.sum(grad_n, axis=0)/n)
4217
- # grad_n = grad_n_sub
4248
+ grad_n_sub = grad_n-(np.sum(grad_n, axis=0)/n)
4249
+ grad_n = grad_n_sub
4218
4250
  grad = grad_n.sum(axis=0)
4219
4251
  return grad, grad_n
4220
4252
 
@@ -4574,7 +4606,7 @@ class ObjectiveFunction(object):
4574
4606
  penalty = self.regularise_l2(betas)
4575
4607
 
4576
4608
  if not np.isreal(loglik):
4577
- loglik = - 1000000000.0
4609
+ loglik = - 10000000.0
4578
4610
 
4579
4611
  output = (-loglik + penalty,)
4580
4612
  if return_gradient:
@@ -4817,7 +4849,7 @@ class ObjectiveFunction(object):
4817
4849
  proba.append(dev.to_cpu(proba_))
4818
4850
 
4819
4851
  lik = np.stack(proba).sum(axis=0) / R # (N, )
4820
- lik = np.clip(lik, min_comp_val, 10000)
4852
+ lik = np.clip(lik, min_comp_val, 1000)
4821
4853
  # lik = np.nan_to_num(lik, )
4822
4854
  loglik = np.log(lik)
4823
4855
  llf_main = loglik
@@ -5435,7 +5467,7 @@ class ObjectiveFunction(object):
5435
5467
 
5436
5468
 
5437
5469
  sol = Solution()
5438
- log_ll = 10.0 ** 9
5470
+
5439
5471
  tol = {'ftol': 1e-8, 'gtol': 1e-6}
5440
5472
  is_delete = 0
5441
5473
  dispersion = mod.get('dispersion')
@@ -5793,7 +5825,7 @@ class ObjectiveFunction(object):
5793
5825
  initial_fit_beta = betas_est.x
5794
5826
  parmas = np.append(initial_fit_beta, nb_parma)
5795
5827
  self.nb_parma = nb_parma
5796
- print(f'neg binomi,{self.nb_parma}')
5828
+ #print(f'neg binomi,{self.nb_parma}')
5797
5829
  betas_est = self._minimize(self._loglik_gradient, initial_fit_beta, args=(
5798
5830
  X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
5799
5831
  self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
@@ -5801,7 +5833,7 @@ class ObjectiveFunction(object):
5801
5833
  options={'gtol': tol['gtol']}, bounds=bounds,
5802
5834
  hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
5803
5835
 
5804
- print('refit with estimation of NB')
5836
+ #print('refit with estimation of NB')
5805
5837
  # self.numerical_hessian_calc = True
5806
5838
  if self.numerical_hessian_calc:
5807
5839
  try:
@@ -6184,6 +6216,7 @@ class ObjectiveFunction(object):
6184
6216
  df_test[:, :, idx], model_nature.get('transformations')[idx] = self.transformer(
6185
6217
  t, idx, df_test[:, :, idx])
6186
6218
  if np.max(df_tf[:, :, idx]) >= 77000:
6219
+ #TODO need to normalise the data
6187
6220
 
6188
6221
  print('should not be possible')
6189
6222
 
@@ -6242,7 +6275,7 @@ class ObjectiveFunction(object):
6242
6275
  model_nature['XH'] = XH
6243
6276
  X_test = None
6244
6277
  if np.isin(X, [np.inf, -np.inf, None, np.nan]).any(): # type ignore
6245
- raise Exception('there is some kind of error')
6278
+ raise Exception('there is some kind of error in X')
6246
6279
 
6247
6280
  # numpy data setup fpr estimation
6248
6281
  indices2 = self.get_named_indices(self.rdm_fit)
@@ -6393,6 +6426,53 @@ class ObjectiveFunction(object):
6393
6426
 
6394
6427
  return obj_1, model_nature
6395
6428
 
6429
+ def get_X_tril(self):
6430
+ '''For correlations find the repeating terms'''
6431
+ varnames = self.none_join([self.rdm_grouped_fit, self.rdm_fit, self.rdm_cor_fit])
6432
+ rv_count_all = 0
6433
+ chol_count = 0
6434
+ rv_count = 0
6435
+ corr_indices = []
6436
+ rv_indices = []
6437
+ for ii, var in enumerate(varnames): # TODO: BUGFIXf
6438
+ if var in self.none_handler(self.rdm_cor_fit):
6439
+ is_correlated = True
6440
+ else:
6441
+ is_correlated = False
6442
+
6443
+ rv_count_all += 1
6444
+ if is_correlated:
6445
+ chol_count += 1
6446
+ else:
6447
+ rv_count += 1
6448
+
6449
+ if var in self.none_handler(self.rdm_cor_fit):
6450
+
6451
+ corr_indices.append(rv_count_all - 1) # TODO: what does tis do
6452
+
6453
+ else:
6454
+ rv_indices.append(rv_count_all - 1)
6455
+
6456
+ # for s.d.: gr_w = (Obs prob. minus predicted probability) * obs. var * random draw
6457
+ draws_tril_idx = np.array([corr_indices[j]
6458
+ for i in range(len(self.none_handler(self.rdm_cor_fit)))
6459
+ for j in range(i + 1)]) # varnames pos.
6460
+ X_tril_idx = np.array([corr_indices[i]
6461
+ for i in range(len(self.none_handler(self.rdm_cor_fit)))
6462
+ for j in range(i + 1)])
6463
+ # Find the s.d. for random variables that are not correlated
6464
+ var_uncor = self.none_join([self.rdm_grouped_fit, self.rdm_fit])
6465
+ range_var = [x for x in
6466
+ range(len(self.none_handler(var_uncor)))]
6467
+ range_var = sorted(range_var)
6468
+ draws_tril_idx = np.array(np.concatenate((range_var, draws_tril_idx)))
6469
+ X_tril_idx = np.array(np.concatenate((range_var, X_tril_idx)))
6470
+ draws_tril_idx = draws_tril_idx.astype(int)
6471
+ X_tril_idx = X_tril_idx.astype(int)
6472
+ return X_tril_idx
6473
+
6474
+
6475
+
6396
6476
  def modifyn(self, data):
6397
6477
  select_data = self._characteristics_names
6398
6478
  alpha = np.isin(select_data, [item.split(':')[0] for item in data['fixed_fit']]).astype(int).tolist()
@@ -6600,23 +6680,35 @@ class ObjectiveFunction(object):
6600
6680
  # N, D = draws.shape[0], draws.shape[1]
6601
6681
  N, R, Kr = draws.shape[0], draws.shape[2], draws.shape[1]
6602
6682
  der = dev.np.ones((N, Kr, R), dtype=draws.dtype)
6603
- if len(self.none_handler(self.rdm_cor_fit)) == 0:
6604
- Br_come_one = self.Br.copy()
6605
- # Br_come_one =
6606
- else:
6607
6683
 
6608
- Br_come_one = self.Br.copy()
6609
6684
  # betas_random = self._transform_rand_betas(betas, betas_std, draws)
6610
6685
  #todo make sure this works for ln and truncated normal
6611
6686
  if any(set(distribution).intersection(['ln_normal', 'tn_normal'])):
6612
- print('check this, intesection shouldn not happen for all')
6687
+
6688
+ #print('check this, intesection shouldn not happen for all')
6689
+
6690
+ if der.shape[1] != draws.shape[1]:
6691
+ print('why')
6613
6692
  Br_come_one = self._transform_rand_betas(betas, betas_std, draws)
6693
+ if der.shape[1] != draws.shape[1]:
6694
+ print('why')
6695
+ #TODO need to get the stuction of the rdms
6614
6696
  for k, dist_k in enumerate(distribution):
6615
6697
  if dist_k == 'ln_normal':
6698
+ if der.shape[1] != draws.shape[1]:
6699
+ print('why')
6616
6700
  der[:, k, :] = Br_come_one[:, k, :]
6701
+ if der.shape[1] != draws.shape[1]:
6702
+ print('why')
6617
6703
  elif dist_k == 'tn_normal':
6704
+ if der.shape[1] != draws.shape[1]:
6705
+ print('why')
6618
6706
  der[:, k, :] = 1 * (Br_come_one[:, k, :] > 0)
6707
+ if der.shape[1] != draws.shape[1]:
6708
+ print('why')
6619
6709
 
6710
+ if der.shape[1] != draws.shape[1]:
6711
+ print('why')
6620
6712
  return der
6621
6713
 
6622
6714
  def _copy_size_display_as_ones(self, matrix):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: metacountregressor
3
- Version: 0.1.118
3
+ Version: 0.1.120
4
4
  Summary: Extensions for a Python package for estimation of count models.
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern