metacountregressor 0.1.108__py3-none-any.whl → 0.1.119__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -69,15 +69,20 @@ def main(args, **kwargs):
69
69
  #data_info['data']['Group'][0]
70
70
  #data_info['data']['Panel'][0]
71
71
  args['decisions'] = data_info['analyst']
72
-
73
- if not np.isnan(data_info['data']['Grouped'][0]):
72
+ grouped_c = data_info['data']['Grouped'][0]
73
+ if isinstance(data_info['data']['Grouped'][0],str):
74
74
  args['group'] = data_info['data']['Grouped'][0]
75
- args['ID'] = data_info['data']['Grouped'][0]
76
- if not np.isnan(data_info['data']['Panel'][0]):
75
+ args['ID'] = data_info['data']['Panel'][0]
76
+ if isinstance(data_info['data']['Panel'][0],str):
77
77
  args['panels'] = data_info['data']['Panel'][0]
78
78
 
79
79
  df = pd.read_csv(str(data_info['data']['Problem'][0]))
80
80
  x_df = df.drop(columns=[data_info['data']['Y'][0]])
81
+ # drop the columns of x_df where column is string exclude the column stype args['group']
82
+ exclude_column = args['group']
83
+ columns_to_keep = x_df.dtypes != 'object'
84
+ columns_to_keep |= (x_df.columns == exclude_column)
85
+ x_df = x_df.loc[:, columns_to_keep]
81
86
  y_df = df[[data_info['data']['Y'][0]]]
82
87
  y_df.rename(columns={data_info['data']['Y'][0]: "Y"}, inplace=True)
83
88
 
@@ -41,6 +41,12 @@ def process_arguments():
41
41
  'hyper': hyper}
42
42
  return new_data
43
43
 
44
+ def process_package_argumemnts():
45
+
46
+ new_data = {}
47
+ pass
48
+
49
+
44
50
  def main(args, **kwargs):
45
51
  '''METACOUNT REGRESSOR TESTING ENVIRONMENT'''
46
52
 
@@ -162,8 +168,8 @@ def main(args, **kwargs):
162
168
  'rdm_cor_terms': [],
163
169
  'grouped_terms': [],
164
170
  'hetro_in_means': [],
165
- 'transformations': ['no', 'log', 'log', 'no', 'no', 'no', 'no'],
166
- 'dispersion': 1
171
+ 'transformations': ['no', 'log', 'no', 'no', 'no', 'no', 'no'],
172
+ 'dispersion': 0
167
173
  }
168
174
 
169
175
  keep = ['Constant', 'US', 'RSMS', 'MCV', 'RSHS', 'AADT', 'Curve50', 'Offset']
@@ -172,13 +178,27 @@ def main(args, **kwargs):
172
178
  elif dataset == 4:
173
179
  manual_fit_spec = {
174
180
  'fixed_terms': ['const', 'LOWPRE', 'GBRPM', 'FRICTION'],
175
- 'rdm_terms': ['Expose:normal', 'INTPM:normal', 'CPM:normal', 'HISNOW:normal'],
181
+ 'rdm_terms': ['EXPOSE:normal', 'INTPM:normal', 'CPM:normal', 'HISNOW:normal'],
182
+ 'rdm_cor_terms': [],
183
+ 'grouped_terms': [],
184
+ 'hetro_in_means': [],
185
+ 'transformations': ['no', 'no', 'no', 'no', 'no', 'no', 'no', 'no'],
186
+ 'dispersion': 1
187
+ }
188
+ '''
189
+ manual_fit_spec = {
190
+ 'fixed_terms': ['const', 'LOWPRE', 'GBRPM', 'FRICTION', 'EXPOSE', 'INTPM', 'CPM', 'HISNOW'],
191
+ 'rdm_terms': [],
176
192
  'rdm_cor_terms': [],
177
193
  'grouped_terms': [],
178
194
  'hetro_in_means': [],
179
195
  'transformations': ['no', 'no', 'no', 'no', 'no', 'no', 'no', 'no'],
180
196
  'dispersion': 1
181
197
  }
198
+ '''
199
+
200
+
201
+ '''
182
202
  print('overriding this delete, just want to test the NB')
183
203
  manual_fit_spec = {
184
204
  'fixed_terms': ['const'],
@@ -189,7 +209,7 @@ def main(args, **kwargs):
189
209
  'transformations': ['no'],
190
210
  'dispersion': 1
191
211
  }
192
-
212
+ '''
193
213
  df = pd.read_csv('./data/Ex-16-3.csv') # read in the data
194
214
  y_df = df[['FREQ']].copy() # only consider crashes
195
215
  y_df.rename(columns={"FREQ": "Y"}, inplace=True)
@@ -262,6 +282,17 @@ def main(args, **kwargs):
262
282
  x_df = helperprocess.interactions(x_df, drop_this_perc=0.8)
263
283
  x_df['county'] = group_grab
264
284
 
285
+ print('benchmark specification')
286
+ manual_fit_spec = {
287
+ 'fixed_terms': ['const', 'monthly_AADT', 'segment_length', 'speed', 'paved_shoulder', 'curve'],
288
+ 'rdm_terms': [],
289
+ 'rdm_cor_terms': [],
290
+ 'grouped_terms': ['DP01:normal', 'DX32:normal'],
291
+ 'hetro_in_means': [],
292
+ 'transformations': ['no', 'no', 'no', 'no', 'no', 'no'],
293
+ 'dispersion': 0
294
+ }
295
+
265
296
  elif dataset == 9:
266
297
  df = pd.read_csv('panel_synth.csv') # read in the data
267
298
  y_df = df[['Y']].copy() # only consider crashes
@@ -286,6 +317,8 @@ def main(args, **kwargs):
286
317
  keep = ['group', 'constant', 'element_ID']
287
318
 
288
319
  x_df = helperprocess.interactions(x_df, keep)
320
+
321
+
289
322
  else: # the dataset has been selected in the program as something else
290
323
  data_info = process_arguments()
291
324
  data_info['hyper']
@@ -442,11 +475,11 @@ if __name__ == '__main__':
442
475
  if "-algorithm" in action.option_strings:
443
476
  parser._optionals._actions[i].help = "optimization algorithm"
444
477
 
445
- override = True
478
+ override = False
446
479
  if override:
447
- print('todo turn off, in testing phase')
480
+ print('WARNING: TESTING ENVIRONMENT, TURN OFF FOR RELEASE')
448
481
  parser.add_argument('-problem_number', default='10')
449
- print('did it make it')
482
+
450
483
  if 'algorithm' not in args:
451
484
  parser.add_argument('-algorithm', type=str, default='hs',
452
485
  help='optimization algorithm')
@@ -30,7 +30,7 @@ from scipy.special import gammaln
30
30
  from sklearn.metrics import mean_absolute_error as MAE
31
31
  from sklearn.metrics import mean_squared_error as MSPE
32
32
  from statsmodels.tools.numdiff import approx_fprime, approx_hess
33
- from sklearn.preprocessing import StandardScaler
33
+ from sklearn.preprocessing import StandardScaler, MinMaxScaler
34
34
  from texttable import Texttable
35
35
 
36
36
  try:
@@ -42,7 +42,7 @@ except ImportError:
42
42
  from pareto_file import Pareto, Solution
43
43
  from data_split_helper import DataProcessor
44
44
 
45
-
45
+ from scipy import stats
46
46
  np.seterr(divide='ignore', invalid='ignore')
47
47
  warnings.simplefilter("ignore")
48
48
 
@@ -124,10 +124,11 @@ class ObjectiveFunction(object):
124
124
 
125
125
  self.reg_penalty = 0
126
126
  self.power_up_ll = False
127
-
127
+ self.nb_parma = 1
128
128
  self.bic = None
129
129
  self.other_bic = False
130
130
  self.test_flag = 1
131
+ self.no_extra_param =1 #if true, fix dispersion. w
131
132
  if self.other_bic:
132
133
  print('change this to false latter ')
133
134
 
@@ -138,7 +139,7 @@ class ObjectiveFunction(object):
138
139
  self.verbose_safe = True
139
140
  self.please_print = kwargs.get('please_print', 0)
140
141
  self.group_halton = None
141
- self.grad_yes = False
142
+ self.grad_yes = True
142
143
  self.hess_yes = False
143
144
  self.group_halton_test = None
144
145
  self.panels = None
@@ -174,8 +175,10 @@ class ObjectiveFunction(object):
174
175
  self._panels = None
175
176
  self.is_multi = True
176
177
  self.method_ll = 'Nelder-Mead-BFGS'
178
+
177
179
  self.method_ll = 'L-BFGS-B' # alternatives 'BFGS_2', 'BFGS
178
180
  self.method_ll = 'BFGS_2'
181
+ #self.method_ll = 'Nelder-Mead-BFGS'
179
182
  self.Keep_Fit = 2
180
183
  self.MP = 0
181
184
  # Nelder-Mead-BFGS
@@ -214,6 +217,11 @@ class ObjectiveFunction(object):
214
217
  self._maximize = False # do we maximize or minimize?
215
218
 
216
219
  x_data = sm.add_constant(x_data)
220
+ standardize_the_data = 0
221
+ if standardize_the_data:
222
+ print('we are standardize the data')
223
+ x_data = self.self_standardize_positive(x_data)
224
+
217
225
  self._input_data(x_data, y_data)
218
226
 
219
227
 
@@ -232,7 +240,7 @@ class ObjectiveFunction(object):
232
240
  if self.test_percentage == 0:
233
241
  self.is_multi = False
234
242
 
235
- if 'panels' in kwargs and not np.isnan(kwargs.get('panels')):
243
+ if 'panels' in kwargs and not (kwargs.get('panels') == None):
236
244
  self.group_names = np.asarray(x_data[kwargs['group']].astype('category').cat._parent.dtype.categories)
237
245
 
238
246
  x_data[kwargs['group']] = x_data[kwargs['group']].astype(
@@ -275,11 +283,11 @@ class ObjectiveFunction(object):
275
283
 
276
284
  #self.n_obs = N
277
285
  self._characteristics_names = list(self._x_data.columns)
278
- self._max_group_all_means = 1
286
+ self._max_group_all_means = 2
279
287
 
280
288
  exclude_this_test = [4]
281
289
 
282
- if 'panels' in kwargs and not np.isnan(kwargs.get('panels')):
290
+ if 'panels' in kwargs and not (kwargs.get('panels') == None):
283
291
  self.panels = np.asarray(df_train[kwargs['panels']])
284
292
  self.panels_test = np.asarray(df_test[kwargs['panels']])
285
293
  self.ids = np.asarray(
@@ -295,6 +303,8 @@ class ObjectiveFunction(object):
295
303
  self.group_halton = group.copy()
296
304
  self.group_dummies = pd.get_dummies(group)
297
305
  Xnew, Ynew, panel_info = self._balance_panels(X, Y, panel)
306
+
307
+ Xnew = pd.DataFrame(Xnew, columns=X.columns)
298
308
  self.panel_info = panel_info
299
309
  self.N, self.P = panel_info.shape
300
310
  Xnew.drop(kwargs['panels'], axis=1, inplace=True)
@@ -385,7 +395,7 @@ class ObjectiveFunction(object):
385
395
 
386
396
 
387
397
 
388
- self.Ndraws = 200 # todo: change back
398
+ self.Ndraws = 1400 # todo: change back
389
399
  self.draws1 = None
390
400
  self.initial_sig = 1 # pass the test of a single model
391
401
  self.pvalue_sig_value = .1
@@ -408,7 +418,7 @@ class ObjectiveFunction(object):
408
418
  # self._transformations = ["no", "sqrt", "log", "exp", "fact", "arcsinh", 2, 3]
409
419
  self._transformations = ["no", "sqrt", "log", "arcsinh"]
410
420
  self._transformations = kwargs.get('_transformation', ["no", "sqrt", "log", 'arcsinh'])
411
-
421
+ self._transformations = kwargs.get('_transformation', ["no", "log", "sqrt", "arcsinh"])
412
422
  # self._distribution = ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal', 'lindley']
413
423
 
414
424
  self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal'])
@@ -766,6 +776,8 @@ class ObjectiveFunction(object):
766
776
  if dispersion == 0:
767
777
  return None, None
768
778
  elif dispersion == 2 or dispersion == 1:
779
+ if self.no_extra_param:
780
+ return self.nb_parma, None
769
781
  return betas[-1], None
770
782
 
771
783
  elif dispersion == 3:
@@ -817,6 +829,8 @@ class ObjectiveFunction(object):
817
829
  distro = ast.literal_eval(extra.iloc[matched_index, 7].values.tolist()[0])
818
830
  distro = self.rename_distro(distro)
819
831
  set_alpha = set_alpha+[distro]
832
+ elif col == 'const':
833
+ set_alpha = set_alpha +[['normal']]
820
834
  return set_alpha
821
835
  return [[x for x in self._distribution]] * self._characteristics
822
836
 
@@ -897,10 +911,12 @@ class ObjectiveFunction(object):
897
911
  return ([self._model_type_codes[dispersion]])
898
912
 
899
913
  def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None, obj_1=None, model_nature=None):
900
- r'''
914
+ '''
901
915
  setup for naming of the model summary
902
916
  '''
917
+ if self.no_extra_param and dispersion ==1:
903
918
 
919
+ betas = np.append(betas, self.nb_parma)
904
920
 
905
921
  self.name_deleter = []
906
922
  group_rpm = None
@@ -1025,7 +1041,7 @@ class ObjectiveFunction(object):
1025
1041
  try:
1026
1042
  if len(betas) != len(names):
1027
1043
  print('no draws is', no_draws)
1028
- print('fix_theano')
1044
+
1029
1045
  except Exception as e:
1030
1046
  print(e)
1031
1047
 
@@ -1052,7 +1068,8 @@ class ObjectiveFunction(object):
1052
1068
  if not isinstance(self.pvalues, np.ndarray):
1053
1069
  raise Exception
1054
1070
 
1055
-
1071
+ if 'nb' in self.coeff_names and self.no_extra_param:
1072
+ self.pvalues = np.append(self.pvalues,0)
1056
1073
 
1057
1074
  if self.please_print or save_state:
1058
1075
 
@@ -1068,17 +1085,22 @@ class ObjectiveFunction(object):
1068
1085
 
1069
1086
  if solution is not None:
1070
1087
  print(f"{self._obj_2}: {self.round_with_padding(solution[self._obj_2], 2)}")
1071
-
1088
+
1072
1089
  self.pvalues = [self.round_with_padding(
1073
1090
  x, 2) for x in self.pvalues]
1074
1091
  signif_list = self.pvalue_asterix_add(self.pvalues)
1075
1092
  if model == 1:
1076
1093
 
1077
- self.coeff_[-1] = 1/np.exp(self.coeff_[-1])
1078
- if self.coeff_[-1] < 0.25:
1094
+ #self.coeff_[-1] = 1/np.exp(self.coeff_[-1])
1095
+ if self.no_extra_param:
1096
+ self.coeff_ = np.append(self.coeff_, self.nb_parma)
1097
+ self.stderr = np.append(self.stderr, 0.00001)
1098
+ self.zvalues = np.append(self.zvalues, 50)
1099
+
1100
+ elif self.coeff_[-1] < 0.25:
1079
1101
  print(self.coeff_[-1], 'Warning Check Dispersion')
1080
1102
  print(np.exp(self.coeff_[-1]))
1081
- self.coeff_[-1] = np.exp(self.coeff_[-1]) # min possible value for negbinom
1103
+ #self.coeff_[-1] = np.exp(self.coeff_[-1]) # min possible value for negbinom
1082
1104
 
1083
1105
  self.coeff_ = [self.round_with_padding(x, 2) for x in self.coeff_]
1084
1106
 
@@ -1301,6 +1323,7 @@ class ObjectiveFunction(object):
1301
1323
 
1302
1324
  if 'AADT' in self._characteristics_names[col]:
1303
1325
  new_transform = [['log']]
1326
+ #new_transform = [['no']]
1304
1327
  transform_set = transform_set + new_transform
1305
1328
 
1306
1329
  elif all(x_data[col] <= 5):
@@ -1340,6 +1363,18 @@ class ObjectiveFunction(object):
1340
1363
 
1341
1364
  return transform_set
1342
1365
 
1366
+ def poisson_mean_get_dispersion(self, betas, X, y):
1367
+ eVy = self._loglik_gradient(betas, X, y, None, X, None, None, False, False, dispersion=0,
1368
+ return_EV=True,
1369
+ zi_list=None, draws_grouped=None, Xgroup=None)
1370
+
1371
+ ab = ((y - eVy)**2 - eVy)/eVy
1372
+ bb = eVy -1
1373
+ disp = sm.OLS(ab.ravel(), bb.ravel()).fit()
1374
+ gamma = disp.params[0]
1375
+ print(f'dispersion is {gamma}')
1376
+ return gamma
1377
+
1343
1378
  def validation(self, betas, y, X, Xr=None, dispersion=0, rdm_cor_fit=None, zi_list=None, exog_infl=None,
1344
1379
  model_nature=None, halton=1, testing=1, validation=0):
1345
1380
  'validation if mu needs to be calculated'
@@ -1373,7 +1408,7 @@ class ObjectiveFunction(object):
1373
1408
  XG = model_nature.get('XGtest')[:total_percent, :, :]
1374
1409
  else:
1375
1410
  XG = model_nature.get('XGtest')[total_percent:, :, :]
1376
- print('chekc this is doing it wright')
1411
+
1377
1412
  else:
1378
1413
  if 'XG' in model_nature:
1379
1414
  XG = model_nature.get('XG')
@@ -1495,7 +1530,7 @@ class ObjectiveFunction(object):
1495
1530
  5: herogeneity_in _means
1496
1531
 
1497
1532
 
1498
- a: how to transofrm the original data
1533
+ a: how to transform the original data
1499
1534
  b: grab dispersion '''
1500
1535
 
1501
1536
  # todo: better way
@@ -1843,7 +1878,10 @@ class ObjectiveFunction(object):
1843
1878
  elif dispersion == 4:
1844
1879
  return 2
1845
1880
  else:
1846
- return 1
1881
+ if self.no_extra_param:
1882
+ return 0
1883
+ else:
1884
+ return 1
1847
1885
 
1848
1886
  def get_pvalue_info_alt(self, pvalues, names, sig_value=0.05, dispersion=0, is_halton=1, delete=0,
1849
1887
  return_violated_terms=0):
@@ -1858,6 +1896,7 @@ class ObjectiveFunction(object):
1858
1896
 
1859
1897
  else:
1860
1898
  slice_this_amount = self.num_dispersion_params(dispersion)
1899
+ slice_this_amount = 1 #TODO handle this
1861
1900
  if pvalues[-1] > sig_value:
1862
1901
  vio_counts += 1
1863
1902
  subpvalues = pvalues[:-slice_this_amount].copy()
@@ -3502,21 +3541,38 @@ class ObjectiveFunction(object):
3502
3541
  # if gamma <= 0.01: #min defined value for stable nb
3503
3542
  # gamma = 0.01
3504
3543
 
3544
+ #g = stats.gamma.rvs(gamma, scale = lam/gamma, size = 1.0 / gamma * lam ** Q )
3505
3545
 
3546
+ #gg = stats.poisson.rvs(g)
3506
3547
 
3548
+
3507
3549
 
3550
+
3508
3551
  endog = y
3509
3552
  mu = lam
3553
+ ''''
3554
+ mu = lam*np.exp(gamma) #TODO check that this does not need to be multiplied
3510
3555
  alpha = np.exp(gamma)
3511
- #size = 1.0 / alpha * mu ** Q
3512
- alpha_size = alpha * mu ** Q
3513
- # prob = size/(size+mu)
3514
- prob = alpha / (alpha + mu)
3515
- # prob = 1/(1+mu*alpha)
3556
+
3557
+ '''
3558
+ alpha = gamma
3559
+ size = 1.0 / alpha * mu ** Q
3560
+
3561
+ prob = size/(size+mu)
3562
+
3563
+
3516
3564
 
3517
3565
  '''test'''
3518
3566
 
3519
3567
 
3568
+ '''
3569
+ size = 1 / np.exp(gamma) * mu ** 0
3570
+ prob = size / (size + mu)
3571
+ coeff = (gammaln(size + y) - gammaln(y + 1) -
3572
+ gammaln(size))
3573
+ llf = coeff + size * np.log(prob) + y * np.log(1 - prob)
3574
+ '''
3575
+
3520
3576
  try:
3521
3577
  # print(np.shape(y),np.shape(size), np.shape(prob))
3522
3578
  #gg2 = self.negbinom_pmf(alpha_size, size/(size+mu), y)
@@ -3528,22 +3584,28 @@ class ObjectiveFunction(object):
3528
3584
  #start_time = time.time()
3529
3585
  #for _ in range(10000):
3530
3586
 
3531
- #gg = self.negbinom_pmf(alpha_size, prob, y)
3587
+
3532
3588
  #end_time = time.time()
3533
3589
  #print("Custom functieon time:", end_time - start_time)
3534
3590
  #start_time = time.time()
3535
3591
  #for _ in range(10000):
3592
+ '''
3536
3593
  gg = np.exp(
3537
3594
  gammaln(y + alpha) - gammaln(y + 1) - gammaln(alpha) + y * np.log(mu) + alpha * np.log(alpha) - (
3538
3595
  y + alpha) * np.log(mu + alpha))
3539
3596
  gg[np.isnan(gg)] = 1
3597
+ '''
3598
+ gg_alt = nbinom.pmf(y ,1/alpha, prob)
3599
+ #gg_alt_2 = (gammaln(size + y) - gammaln(y + 1) -
3600
+ #gammaln(size)) + size * np.log(prob) + y * np.log(1 - prob)
3601
+ #print('check theses')
3540
3602
  #gg = nbinom.pmf(y ,alpha, prob)
3541
3603
  #end_time = time.time()
3542
3604
  #print("Custom functieon time:", end_time - start_time)
3543
3605
 
3544
3606
  except Exception as e:
3545
3607
  print(e)
3546
- return gg
3608
+ return gg_alt
3547
3609
 
3548
3610
  def lindley_pmf(self, x, r, theta, k=50):
3549
3611
  """
@@ -3690,8 +3752,8 @@ class ObjectiveFunction(object):
3690
3752
 
3691
3753
  if dispersion == 1 or dispersion == 4: # nb
3692
3754
  # if model_nature is not None and 'dispersion_penalty' in model_nature:
3693
-
3694
-
3755
+ #b_gam = 1/np.exp(b_gam)
3756
+ #print(b_gam)
3695
3757
  if b_gam <= 0:
3696
3758
  #penalty += 100
3697
3759
  #penalty += abs(b_gam)
@@ -3699,9 +3761,9 @@ class ObjectiveFunction(object):
3699
3761
  #b_gam = 1
3700
3762
 
3701
3763
  # if b_gam < 0.03:
3702
- penalty += min(1, np.abs(b_gam))
3764
+ penalty += min(1, np.abs(b_gam), 0)
3703
3765
 
3704
- b_gam = 0.001
3766
+ #b_gam = 0.001
3705
3767
  #
3706
3768
 
3707
3769
  #if b_gam >= 10:
@@ -3733,8 +3795,15 @@ class ObjectiveFunction(object):
3733
3795
  def eXB_calc(self, params_main, Xd, offset, dispersion, b_gam=None):
3734
3796
 
3735
3797
  # print('this was 0')
3736
- eta = np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
3798
+ if dispersion:
3799
+ eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
3800
+
3801
+ #eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])+dispersion
3802
+ #print('check if this holds size')
3803
+ else:
3804
+ eta = np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
3737
3805
  eta = np.array(eta)
3806
+
3738
3807
  # eta = np.float64(eta)
3739
3808
  # eta = np.dot(Xd, params_main)+offset[:,:,0]
3740
3809
  # eta2 = np.dot(Xd, params_main)[:,:,None]+np.array(offset[:,:,:])
@@ -3907,6 +3976,8 @@ class ObjectiveFunction(object):
3907
3976
  if dispersion == 0 or dispersion == 3:
3908
3977
  return 0
3909
3978
  else:
3979
+
3980
+
3910
3981
  return 1
3911
3982
 
3912
3983
  def _prob_product_across_panels(self, pch, panel_info):
@@ -3962,7 +4033,7 @@ class ObjectiveFunction(object):
3962
4033
  if y[i] == 0:
3963
4034
  gr_e[i] = 0
3964
4035
 
3965
- if self.is_dispersion(dispersion):
4036
+ if self.is_dispersion(dispersion) and not self.no_extra_param:
3966
4037
  gr_d = np.zeros((N, 1))
3967
4038
  if dispersion == 1:
3968
4039
  # trying alt
@@ -4067,9 +4138,9 @@ class ObjectiveFunction(object):
4067
4138
  dprod_r = dev.np.einsum("njk,njr -> nkr", Xdr,
4068
4139
  einsum_model_form, dtype=np.float64) # (N,K,R)
4069
4140
  der_prod_r = dprod_r * der * proba_n[:, None, :] # (N,K,R)
4070
- der_prod_r = dprod_r * der * proba_n[:, X_tril_idx, :] # I think this is the case check
4141
+ #der_prod_r = dprod_r * der * proba_n[:, X_tril_idx, :] # I think this is the case check
4071
4142
  der_prod_r = dprod_r[:, X_tril_idx, :] * der * proba_n[:, None, :] # or this one
4072
- print('which one of these')
4143
+ #print('which one of these')
4073
4144
  der_t = self._compute_derivatives(
4074
4145
  br, draws_[:, draws_tril_idx, :], brstd, self.dist_fit) # (N,K,R)
4075
4146
  # er_t = self._compute_derivatives(br, draws_, brstd[:, draws_tril_idx,: ], self.dist_fit, draws_tril_idx)
@@ -4132,8 +4203,12 @@ class ObjectiveFunction(object):
4132
4203
  grad_n = self._concat_gradients(
4133
4204
  (gr_f, gr_u, gr_s, gr_e)) / Rlik # (N,K)
4134
4205
  else:
4135
- grad_n = self._concat_gradients(
4136
- (gr_f, gr_u, gr_s, gr_h, gr_hs, gr_d[:, None])) / Rlik # (N,K)
4206
+ if self.no_extra_param:
4207
+ grad_n = self._concat_gradients(
4208
+ (gr_f, gr_u, gr_s, gr_h, gr_hs)) / Rlik # (N,K)
4209
+ else:
4210
+ grad_n = self._concat_gradients(
4211
+ (gr_f, gr_u, gr_s, gr_h, gr_hs, gr_d[:, None])) / Rlik # (N,K)
4137
4212
  grad_n = np.nan_to_num(grad_n, nan=0, posinf=10000, neginf=-10000)
4138
4213
  grad_n = np.clip(grad_n, -1000, 1000)
4139
4214
  n = np.shape(grad_n)[0]
@@ -4290,7 +4365,7 @@ class ObjectiveFunction(object):
4290
4365
  return proba_r.sum(axis=1), np.squeeze(proba_r)
4291
4366
 
4292
4367
  def _penalty_betas(self, betas, dispersion, penalty, penalty_ap=100.0):
4293
- penalty_val = 0.05
4368
+ penalty_val = 0.1
4294
4369
  penalty_val_max = 130
4295
4370
 
4296
4371
  # print('change_later')
@@ -4306,8 +4381,8 @@ class ObjectiveFunction(object):
4306
4381
  if abs(i) > penalty_val_max:
4307
4382
  penalty += abs(i)
4308
4383
 
4309
- # if abs(i) < penalty_val:
4310
- # penalty += 5
4384
+ #if abs(i) < penalty_val:
4385
+ # penalty += 5
4311
4386
 
4312
4387
  # penalty = 0
4313
4388
  return penalty
@@ -4414,8 +4489,7 @@ class ObjectiveFunction(object):
4414
4489
  index += 1
4415
4490
 
4416
4491
  brstd = br_std
4417
- print(brstd)
4418
- print(brstd)
4492
+
4419
4493
 
4420
4494
 
4421
4495
  def _loglik_gradient(self, betas, Xd, y, draws=None, Xf=None, Xr=None, batch_size=None, return_gradient=False,
@@ -4447,7 +4521,7 @@ class ObjectiveFunction(object):
4447
4521
  penalty = self._penalty_betas(
4448
4522
  betas, dispersion, penalty, float(len(y) / 10.0))
4449
4523
  self.n_obs = len(y) # feeds into gradient
4450
- if draws is None and draws_grouped is None and (
4524
+ if draws is None and draws_grouped is None and (model_nature is None or
4451
4525
  'draws_hetro' not in model_nature or model_nature.get('draws_hetro').shape[1] == 0):
4452
4526
  #TODO do i shuffle the draws
4453
4527
  if type(Xd) == dict:
@@ -4594,7 +4668,9 @@ class ObjectiveFunction(object):
4594
4668
  Kf = 0
4595
4669
  else:
4596
4670
  if n_coeff != len(betas):
4597
- raise Exception
4671
+ raise Exception(
4672
+
4673
+ )
4598
4674
  Bf = betas[0:Kf] # Fixed betas
4599
4675
 
4600
4676
 
@@ -4696,7 +4772,8 @@ class ObjectiveFunction(object):
4696
4772
  eVd = self.lam_transform(eVd, dispersion, betas[-1])
4697
4773
 
4698
4774
  if self.is_dispersion(dispersion):
4699
- penalty, betas[-1] = self._penalty_dispersion(
4775
+ if not self.no_extra_param:
4776
+ penalty, betas[-1] = self._penalty_dispersion(
4700
4777
  dispersion, betas[-1], eVd, y, penalty, model_nature)
4701
4778
 
4702
4779
  '''
@@ -5341,7 +5418,7 @@ class ObjectiveFunction(object):
5341
5418
  return a
5342
5419
 
5343
5420
  def fitRegression(self, mod,
5344
- dispersion=0, maxiter=2000, batch_size=None, num_hess=False):
5421
+ dispersion=0, maxiter=2000, batch_size=None, num_hess=False, **kwargs):
5345
5422
 
5346
5423
  """
5347
5424
  Fits a poisson regression given data and outcomes if dispersion is not declared
@@ -5387,6 +5464,8 @@ class ObjectiveFunction(object):
5387
5464
  _g, pg, kg = 0, 0, 0
5388
5465
 
5389
5466
  dispersion_param_num = self.is_dispersion(dispersion)
5467
+ if self.no_extra_param:
5468
+ dispersion_param_num =0
5390
5469
 
5391
5470
  #paramNum = self.get_param_num(dispersion)
5392
5471
  self.no_random_paramaters = 0
@@ -5441,17 +5520,26 @@ class ObjectiveFunction(object):
5441
5520
  else:
5442
5521
  bb[0] = self.constant_value
5443
5522
  if dispersion == 1:
5444
- bb[-1] = self.negative_binomial_value
5523
+ if not self.no_extra_param:
5524
+ bb[-1] = self.negative_binomial_value
5445
5525
  bounds = None
5446
5526
 
5527
+
5528
+
5447
5529
  # intial_beta = minimize(self._loglik_gradient, bb, args =(XX, y, None, None, None, None, calc_gradient, hess_est, dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod), method = 'nelder-mead', options={'gtol': 1e-7*len(XX)})
5448
5530
  hess_est = False if method2 in ['L-BFGS-B', 'BFGS_2', 'Nelder-Mead-BFGS'] else True
5449
- initial_beta = self._minimize(self._loglik_gradient, bb,
5531
+
5532
+ if self.no_extra_param:
5533
+ dispersion_poisson = 0
5534
+ initial_beta = self._minimize(self._loglik_gradient, bb,
5450
5535
  args=(XX, y, None, None, None, None, calc_gradient, hess_est,
5451
- dispersion, 0, False, 0, None, None, None, None, None,
5536
+ dispersion_poisson, 0, False, 0, None, None, None, None, None,
5452
5537
  mod),
5453
5538
  method=method2, tol=1e-5, options={'gtol': tol['gtol']},
5454
5539
  bounds=bounds)
5540
+ if dispersion:
5541
+ nb_parma = self.poisson_mean_get_dispersion(initial_beta.x, XX, y)
5542
+
5455
5543
 
5456
5544
 
5457
5545
 
@@ -5551,7 +5639,7 @@ class ObjectiveFunction(object):
5551
5639
 
5552
5640
  b = [b[i] if i > len(self.none_handler(self.fixed_fit)) + len(
5553
5641
  self.none_handler(self.rdm_fit)) + len(
5554
- self.none_handler(self.rdm_cor_fit)) else b[i] / 1.3 for i in range(len(b))]
5642
+ self.none_handler(self.rdm_cor_fit)) else b[i] / 1 for i in range(len(b))]
5555
5643
  else:
5556
5644
  b = bb
5557
5645
 
@@ -5561,9 +5649,10 @@ class ObjectiveFunction(object):
5561
5649
  else:
5562
5650
  b = np.insert(b, -1, np.random.uniform(0.05, 0.1))
5563
5651
  if dispersion == 1:
5564
- b[-1] = np.abs(b[-1])
5565
- if b[-1] > 10:
5566
- b[-1] = 5
5652
+ if not self.no_extra_param:
5653
+ b[-1] = np.abs(b[-1])
5654
+ if b[-1] > 10:
5655
+ b[-1] = 5
5567
5656
  elif dispersion == 2:
5568
5657
  b[-1] = .5
5569
5658
  if method == 'L-BFGS-B' or method2 == 'L-BFGS-B':
@@ -5689,13 +5778,30 @@ class ObjectiveFunction(object):
5689
5778
 
5690
5779
  if draws is None and draws_hetro is not None:
5691
5780
  print('hold')
5692
- betas_est = self._minimize(self._loglik_gradient, b, args=(
5693
- X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
5694
- self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
5695
- method=method2, tol=tol['ftol'],
5696
- options={'gtol': tol['gtol']}, bounds=bounds,
5697
- hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
5781
+ #self.grad_yes = True
5782
+ #self.hess_yes = True
5698
5783
 
5784
+ if self.no_extra_param:
5785
+ dispersion_poisson = 0
5786
+ betas_est = self._minimize(self._loglik_gradient, b, args=(
5787
+ X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion_poisson, 0, False, 0,
5788
+ self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
5789
+ method=method2, tol=tol['ftol'],
5790
+ options={'gtol': tol['gtol']}, bounds=bounds,
5791
+ hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
5792
+ if dispersion:
5793
+ initial_fit_beta = betas_est.x
5794
+ parmas = np.append(initial_fit_beta, nb_parma)
5795
+ self.nb_parma = nb_parma
5796
+ print(f'neg binomi,{self.nb_parma}')
5797
+ betas_est = self._minimize(self._loglik_gradient, initial_fit_beta, args=(
5798
+ X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
5799
+ self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
5800
+ method=method2, tol=tol['ftol'],
5801
+ options={'gtol': tol['gtol']}, bounds=bounds,
5802
+ hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
5803
+
5804
+ print('refit with estimation of NB')
5699
5805
  # self.numerical_hessian_calc = True
5700
5806
  if self.numerical_hessian_calc:
5701
5807
  try:
@@ -5994,7 +6100,7 @@ class ObjectiveFunction(object):
5994
6100
  return delim + self._model_type_codes[dispersion]
5995
6101
 
5996
6102
  def self_standardize_positive(self, X):
5997
- scaler = StandardScaler()
6103
+ scaler = MinMaxScaler()
5998
6104
  if type(X) == list:
5999
6105
  return X
6000
6106
 
@@ -6004,12 +6110,26 @@ class ObjectiveFunction(object):
6004
6110
  # Reshaping to 2D - combining the last two dimensions
6005
6111
  df_tf_reshaped = X.reshape(original_shape[0], -1)
6006
6112
  df_tf_scaled = scaler.fit_transform(df_tf_reshaped)
6007
- df_tf_scaled = df_tf_scaled - df_tf_scaled.min()
6113
+ #df_tf_scaled = df_tf_scaled - df_tf_scaled.min()
6008
6114
  # Reshape back to original 3D shape if necessary
6009
6115
  df_tf = df_tf_scaled.reshape(original_shape)
6010
6116
  return df_tf
6011
6117
  else:
6012
- raise X
6118
+ # Initialize the MinMaxScaler
6119
+ scaler = MinMaxScaler()
6120
+ float_columns = X.select_dtypes(include=['float64', 'float32', 'int']).columns.difference(['const', 'offset, "EXPOSE', 'Constant', 'constant'])
6121
+ non_numeric_columns = X.select_dtypes(exclude=['float64', 'float32', 'int']).columns
6122
+
6123
+ # Fit the scaler to the float columns and transform them
6124
+ X[float_columns] = scaler.fit_transform(X[float_columns])
6125
+ # Fit the scaler to the data and transform it
6126
+ #scaled_data = scaler.fit_transform(X)
6127
+
6128
+ # Convert the result back to a DataFrame
6129
+ #scaled_df = pd.DataFrame(scaled_data, columns=X.columns)
6130
+
6131
+
6132
+ return X
6013
6133
 
6014
6134
  def make_regression_from_terms(self, fixed=None, rdm=None, rdm_cor_fit=None, distribution=None, dispersion=None,
6015
6135
  *args, **kwargs):
@@ -6065,7 +6185,7 @@ class ObjectiveFunction(object):
6065
6185
  t, idx, df_test[:, :, idx])
6066
6186
  if np.max(df_tf[:, :, idx]) >= 77000:
6067
6187
 
6068
- raise Exception('should not be possible')
6188
+ print('should not be possible')
6069
6189
 
6070
6190
  self.define_selfs_fixed_rdm_cor(model_nature)
6071
6191
  indices = self.get_named_indices(self.fixed_fit)
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: metacountregressor
3
- Version: 0.1.108
3
+ Version: 0.1.119
4
4
  Summary: Extensions for a Python package for estimation of count models.
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern
@@ -9,9 +9,18 @@ License: QUT
9
9
  Requires-Python: >=3.10
10
10
  Description-Content-Type: text/markdown
11
11
  License-File: LICENSE.txt
12
- Requires-Dist: numpy >=1.13.1
13
- Requires-Dist: scipy >=1.0.0
12
+ Requires-Dist: numpy>=1.13.1
13
+ Requires-Dist: scipy>=1.0.0
14
14
  Requires-Dist: requests
15
+ Dynamic: author
16
+ Dynamic: author-email
17
+ Dynamic: description
18
+ Dynamic: description-content-type
19
+ Dynamic: home-page
20
+ Dynamic: license
21
+ Dynamic: requires-dist
22
+ Dynamic: requires-python
23
+ Dynamic: summary
15
24
 
16
25
  <div style="display: flex; align-items: center;">
17
26
  <img src="https://github.com/zahern/data/raw/main/m.png" alt="My Image" style="width: 100px; margin-right: 20px;">
@@ -23,10 +32,24 @@ Requires-Dist: requests
23
32
 
24
33
  The tutorial provides more extensive examples on how to run the code and perform experiments. Further documentation is currently in development.
25
34
 
26
- ##### Quick Setup
35
+ # For an Application Setup Download the following GUI
36
+ [Download Application](https://github.com/zahern/MetaCount/tree/master/metacountregressor/application_gui/dist/meta_app)
37
+
38
+ The application involves setting up a problem instance to run the models.
39
+
40
+ ### Entire [Git Repository](https://github.com/zahern/MetaCount.git) is available to clone.
41
+ #### Steps
42
+ 1. Clone Project
43
+ 2. Navigate to "metacountregressor/application_gui/dist/meta_app"
44
+ 3. Run meta_app.exe
45
+ 4. Navigate to metacountregressor/app_main.py
46
+ 5. Run app_main.py
47
+
48
+
49
+ ## Setup For Python Package Approach
27
50
  The Below code demonstrates how to set up automatic optimization assisted by the harmony search algorithm. References to the Differential Evolution and Simulated Annealing has been mentioned (change accordingly)
28
51
 
29
- ## Quick install: Requires Python 3.10
52
+ ## Install: Requires Python 3.10
30
53
 
31
54
  Install `metacountregressor` using pip as follows:
32
55
 
@@ -1,20 +1,20 @@
1
1
  metacountregressor/__init__.py,sha256=UM4zaqoAcZVWyx3SeL9bRS8xpQ_iLZU9fIIARWmfjis,2937
2
2
  metacountregressor/_device_cust.py,sha256=759fnKmTYccJm4Lpi9_1reurh6OB9d6q9soPR0PltKc,2047
3
- metacountregressor/app_main.py,sha256=vdI_b_0GMYWMIUd3iN-Cak-xukM8uxNhaUggUl5SKPs,9695
3
+ metacountregressor/app_main.py,sha256=vY3GczTbGbBRalbzMkl_9jVW7RMgEOc6z2Dr1IZJv9c,10014
4
4
  metacountregressor/data_split_helper.py,sha256=M2fIMdIO8znUaYhx5wlacRyNWdQjNYu1z1wkE-kFUYU,3373
5
5
  metacountregressor/halton.py,sha256=jhovA45UBoZYU9g-hl6Lb2sBIx_ZBTNdPrpgkzR9fng,9463
6
6
  metacountregressor/helperprocess.py,sha256=Sc5gJ7ffFlkya5B5KQwE33xxXuIQyF6OaYtSikLa3pQ,12968
7
- metacountregressor/main.py,sha256=3prcbUriyMRqpIUsW4dt-X-qLV5mLGI4jA2uh9ZQDRM,19638
7
+ metacountregressor/main.py,sha256=lHrj2Hvj2hTGi-2mLSbuGEHDDILl4V-ml9e9Y_Hjpts,20560
8
8
  metacountregressor/main_old.py,sha256=eTS4ygq27MnU-dZ_j983Ucb-D5XfbVF8OJQK2hVVLZc,24123
9
9
  metacountregressor/metaheuristics.py,sha256=UyUmHyuQd5vZ2wdaVL0dWpJfBOBdtCAqcA0GlFzouH8,105849
10
10
  metacountregressor/pareto_file.py,sha256=whySaoPAUWYjyI8zo0hwAOa3rFk6SIUlHSpqZiLur0k,23096
11
11
  metacountregressor/pareto_logger__plot.py,sha256=mEU2QN4wmsM7t39GJ_XhJ_jjsdl09JOmG0U2jICrAkI,30037
12
12
  metacountregressor/setup.py,sha256=8w6IqX0tJsbYrOI1BJLIJCIvOnunKli5I9fsF5PhHv4,919
13
13
  metacountregressor/single_objective_finder.py,sha256=jVG7GJBqzSP4_riYr-kMMKy_LE3SlGmKMunNhHYxgRg,8011
14
- metacountregressor/solution.py,sha256=SzI3JItc7CzVcJOAOiztQJD0WMRaE0pw1ps55kP90tc,268893
14
+ metacountregressor/solution.py,sha256=4pfpDcH38eTIGV7DIDL5ZOyB_ND-iDzod964-RBhC5o,274195
15
15
  metacountregressor/test_generated_paper2.py,sha256=pwOoRzl1jJIIOUAAvbkT6HmmTQ81mwpsshn9SLdKOg8,3927
16
- metacountregressor-0.1.108.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
17
- metacountregressor-0.1.108.dist-info/METADATA,sha256=5fydEG4twDksNZ8hTHvTcXW2MjAjl6eTnroisrkzM4k,22686
18
- metacountregressor-0.1.108.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
19
- metacountregressor-0.1.108.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
20
- metacountregressor-0.1.108.dist-info/RECORD,,
16
+ metacountregressor-0.1.119.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
17
+ metacountregressor-0.1.119.dist-info/METADATA,sha256=Kya6i22baCjmfCkzW-1VnaAjzinJS7iI15R05qWr5Z8,23415
18
+ metacountregressor-0.1.119.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
19
+ metacountregressor-0.1.119.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
20
+ metacountregressor-0.1.119.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.1.0)
2
+ Generator: setuptools (75.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5