metacountregressor 0.1.113__py3-none-any.whl → 0.1.119__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -69,15 +69,20 @@ def main(args, **kwargs):
69
69
  #data_info['data']['Group'][0]
70
70
  #data_info['data']['Panel'][0]
71
71
  args['decisions'] = data_info['analyst']
72
-
73
- if not np.isnan(data_info['data']['Grouped'][0]):
72
+ grouped_c = data_info['data']['Grouped'][0]
73
+ if isinstance(data_info['data']['Grouped'][0],str):
74
74
  args['group'] = data_info['data']['Grouped'][0]
75
- args['ID'] = data_info['data']['Grouped'][0]
76
- if not np.isnan(data_info['data']['Panel'][0]):
75
+ args['ID'] = data_info['data']['Panel'][0]
76
+ if isinstance(data_info['data']['Panel'][0],str):
77
77
  args['panels'] = data_info['data']['Panel'][0]
78
78
 
79
79
  df = pd.read_csv(str(data_info['data']['Problem'][0]))
80
80
  x_df = df.drop(columns=[data_info['data']['Y'][0]])
81
+ # drop the columns of x_df where column is string exclude the column stype args['group']
82
+ exclude_column = args['group']
83
+ columns_to_keep = x_df.dtypes != 'object'
84
+ columns_to_keep |= (x_df.columns == exclude_column)
85
+ x_df = x_df.loc[:, columns_to_keep]
81
86
  y_df = df[[data_info['data']['Y'][0]]]
82
87
  y_df.rename(columns={data_info['data']['Y'][0]: "Y"}, inplace=True)
83
88
 
@@ -41,6 +41,12 @@ def process_arguments():
41
41
  'hyper': hyper}
42
42
  return new_data
43
43
 
44
+ def process_package_argumemnts():
45
+
46
+ new_data = {}
47
+ pass
48
+
49
+
44
50
  def main(args, **kwargs):
45
51
  '''METACOUNT REGRESSOR TESTING ENVIRONMENT'''
46
52
 
@@ -162,8 +168,8 @@ def main(args, **kwargs):
162
168
  'rdm_cor_terms': [],
163
169
  'grouped_terms': [],
164
170
  'hetro_in_means': [],
165
- 'transformations': ['no', 'log', 'log', 'no', 'no', 'no', 'no'],
166
- 'dispersion': 1
171
+ 'transformations': ['no', 'log', 'no', 'no', 'no', 'no', 'no'],
172
+ 'dispersion': 0
167
173
  }
168
174
 
169
175
  keep = ['Constant', 'US', 'RSMS', 'MCV', 'RSHS', 'AADT', 'Curve50', 'Offset']
@@ -172,13 +178,27 @@ def main(args, **kwargs):
172
178
  elif dataset == 4:
173
179
  manual_fit_spec = {
174
180
  'fixed_terms': ['const', 'LOWPRE', 'GBRPM', 'FRICTION'],
175
- 'rdm_terms': ['Expose:normal', 'INTPM:normal', 'CPM:normal', 'HISNOW:normal'],
181
+ 'rdm_terms': ['EXPOSE:normal', 'INTPM:normal', 'CPM:normal', 'HISNOW:normal'],
182
+ 'rdm_cor_terms': [],
183
+ 'grouped_terms': [],
184
+ 'hetro_in_means': [],
185
+ 'transformations': ['no', 'no', 'no', 'no', 'no', 'no', 'no', 'no'],
186
+ 'dispersion': 1
187
+ }
188
+ '''
189
+ manual_fit_spec = {
190
+ 'fixed_terms': ['const', 'LOWPRE', 'GBRPM', 'FRICTION', 'EXPOSE', 'INTPM', 'CPM', 'HISNOW'],
191
+ 'rdm_terms': [],
176
192
  'rdm_cor_terms': [],
177
193
  'grouped_terms': [],
178
194
  'hetro_in_means': [],
179
195
  'transformations': ['no', 'no', 'no', 'no', 'no', 'no', 'no', 'no'],
180
196
  'dispersion': 1
181
197
  }
198
+ '''
199
+
200
+
201
+ '''
182
202
  print('overriding this delete, just want to test the NB')
183
203
  manual_fit_spec = {
184
204
  'fixed_terms': ['const'],
@@ -189,7 +209,7 @@ def main(args, **kwargs):
189
209
  'transformations': ['no'],
190
210
  'dispersion': 1
191
211
  }
192
-
212
+ '''
193
213
  df = pd.read_csv('./data/Ex-16-3.csv') # read in the data
194
214
  y_df = df[['FREQ']].copy() # only consider crashes
195
215
  y_df.rename(columns={"FREQ": "Y"}, inplace=True)
@@ -262,6 +282,17 @@ def main(args, **kwargs):
262
282
  x_df = helperprocess.interactions(x_df, drop_this_perc=0.8)
263
283
  x_df['county'] = group_grab
264
284
 
285
+ print('benchmark specification')
286
+ manual_fit_spec = {
287
+ 'fixed_terms': ['const', 'monthly_AADT', 'segment_length', 'speed', 'paved_shoulder', 'curve'],
288
+ 'rdm_terms': [],
289
+ 'rdm_cor_terms': [],
290
+ 'grouped_terms': ['DP01:normal', 'DX32:normal'],
291
+ 'hetro_in_means': [],
292
+ 'transformations': ['no', 'no', 'no', 'no', 'no', 'no'],
293
+ 'dispersion': 0
294
+ }
295
+
265
296
  elif dataset == 9:
266
297
  df = pd.read_csv('panel_synth.csv') # read in the data
267
298
  y_df = df[['Y']].copy() # only consider crashes
@@ -286,6 +317,8 @@ def main(args, **kwargs):
286
317
  keep = ['group', 'constant', 'element_ID']
287
318
 
288
319
  x_df = helperprocess.interactions(x_df, keep)
320
+
321
+
289
322
  else: # the dataset has been selected in the program as something else
290
323
  data_info = process_arguments()
291
324
  data_info['hyper']
@@ -442,11 +475,11 @@ if __name__ == '__main__':
442
475
  if "-algorithm" in action.option_strings:
443
476
  parser._optionals._actions[i].help = "optimization algorithm"
444
477
 
445
- override = True
478
+ override = False
446
479
  if override:
447
- print('todo turn off, in testing phase')
480
+ print('WARNING: TESTING ENVIRONMENT, TURN OFF FOR RELEASE')
448
481
  parser.add_argument('-problem_number', default='10')
449
- print('did it make it')
482
+
450
483
  if 'algorithm' not in args:
451
484
  parser.add_argument('-algorithm', type=str, default='hs',
452
485
  help='optimization algorithm')
@@ -30,7 +30,7 @@ from scipy.special import gammaln
30
30
  from sklearn.metrics import mean_absolute_error as MAE
31
31
  from sklearn.metrics import mean_squared_error as MSPE
32
32
  from statsmodels.tools.numdiff import approx_fprime, approx_hess
33
- from sklearn.preprocessing import StandardScaler
33
+ from sklearn.preprocessing import StandardScaler, MinMaxScaler
34
34
  from texttable import Texttable
35
35
 
36
36
  try:
@@ -42,7 +42,7 @@ except ImportError:
42
42
  from pareto_file import Pareto, Solution
43
43
  from data_split_helper import DataProcessor
44
44
 
45
-
45
+ from scipy import stats
46
46
  np.seterr(divide='ignore', invalid='ignore')
47
47
  warnings.simplefilter("ignore")
48
48
 
@@ -124,10 +124,11 @@ class ObjectiveFunction(object):
124
124
 
125
125
  self.reg_penalty = 0
126
126
  self.power_up_ll = False
127
-
127
+ self.nb_parma = 1
128
128
  self.bic = None
129
129
  self.other_bic = False
130
130
  self.test_flag = 1
131
+ self.no_extra_param =1 #if true, fix dispersion. w
131
132
  if self.other_bic:
132
133
  print('change this to false latter ')
133
134
 
@@ -138,7 +139,7 @@ class ObjectiveFunction(object):
138
139
  self.verbose_safe = True
139
140
  self.please_print = kwargs.get('please_print', 0)
140
141
  self.group_halton = None
141
- self.grad_yes = False
142
+ self.grad_yes = True
142
143
  self.hess_yes = False
143
144
  self.group_halton_test = None
144
145
  self.panels = None
@@ -174,8 +175,10 @@ class ObjectiveFunction(object):
174
175
  self._panels = None
175
176
  self.is_multi = True
176
177
  self.method_ll = 'Nelder-Mead-BFGS'
178
+
177
179
  self.method_ll = 'L-BFGS-B' # alternatives 'BFGS_2', 'BFGS
178
180
  self.method_ll = 'BFGS_2'
181
+ #self.method_ll = 'Nelder-Mead-BFGS'
179
182
  self.Keep_Fit = 2
180
183
  self.MP = 0
181
184
  # Nelder-Mead-BFGS
@@ -214,6 +217,11 @@ class ObjectiveFunction(object):
214
217
  self._maximize = False # do we maximize or minimize?
215
218
 
216
219
  x_data = sm.add_constant(x_data)
220
+ standardize_the_data = 0
221
+ if standardize_the_data:
222
+ print('we are standardize the data')
223
+ x_data = self.self_standardize_positive(x_data)
224
+
217
225
  self._input_data(x_data, y_data)
218
226
 
219
227
 
@@ -232,7 +240,7 @@ class ObjectiveFunction(object):
232
240
  if self.test_percentage == 0:
233
241
  self.is_multi = False
234
242
 
235
- if 'panels' in kwargs and not np.isnan(kwargs.get('panels')):
243
+ if 'panels' in kwargs and not (kwargs.get('panels') == None):
236
244
  self.group_names = np.asarray(x_data[kwargs['group']].astype('category').cat._parent.dtype.categories)
237
245
 
238
246
  x_data[kwargs['group']] = x_data[kwargs['group']].astype(
@@ -275,11 +283,11 @@ class ObjectiveFunction(object):
275
283
 
276
284
  #self.n_obs = N
277
285
  self._characteristics_names = list(self._x_data.columns)
278
- self._max_group_all_means = 1
286
+ self._max_group_all_means = 2
279
287
 
280
288
  exclude_this_test = [4]
281
289
 
282
- if 'panels' in kwargs and not np.isnan(kwargs.get('panels')):
290
+ if 'panels' in kwargs and not (kwargs.get('panels') == None):
283
291
  self.panels = np.asarray(df_train[kwargs['panels']])
284
292
  self.panels_test = np.asarray(df_test[kwargs['panels']])
285
293
  self.ids = np.asarray(
@@ -295,6 +303,8 @@ class ObjectiveFunction(object):
295
303
  self.group_halton = group.copy()
296
304
  self.group_dummies = pd.get_dummies(group)
297
305
  Xnew, Ynew, panel_info = self._balance_panels(X, Y, panel)
306
+
307
+ Xnew = pd.DataFrame(Xnew, columns=X.columns)
298
308
  self.panel_info = panel_info
299
309
  self.N, self.P = panel_info.shape
300
310
  Xnew.drop(kwargs['panels'], axis=1, inplace=True)
@@ -385,7 +395,7 @@ class ObjectiveFunction(object):
385
395
 
386
396
 
387
397
 
388
- self.Ndraws = 200 # todo: change back
398
+ self.Ndraws = 1400 # todo: change back
389
399
  self.draws1 = None
390
400
  self.initial_sig = 1 # pass the test of a single model
391
401
  self.pvalue_sig_value = .1
@@ -408,7 +418,7 @@ class ObjectiveFunction(object):
408
418
  # self._transformations = ["no", "sqrt", "log", "exp", "fact", "arcsinh", 2, 3]
409
419
  self._transformations = ["no", "sqrt", "log", "arcsinh"]
410
420
  self._transformations = kwargs.get('_transformation', ["no", "sqrt", "log", 'arcsinh'])
411
-
421
+ self._transformations = kwargs.get('_transformation', ["no", "log", "sqrt", "arcsinh"])
412
422
  # self._distribution = ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal', 'lindley']
413
423
 
414
424
  self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal'])
@@ -766,6 +776,8 @@ class ObjectiveFunction(object):
766
776
  if dispersion == 0:
767
777
  return None, None
768
778
  elif dispersion == 2 or dispersion == 1:
779
+ if self.no_extra_param:
780
+ return self.nb_parma, None
769
781
  return betas[-1], None
770
782
 
771
783
  elif dispersion == 3:
@@ -817,6 +829,8 @@ class ObjectiveFunction(object):
817
829
  distro = ast.literal_eval(extra.iloc[matched_index, 7].values.tolist()[0])
818
830
  distro = self.rename_distro(distro)
819
831
  set_alpha = set_alpha+[distro]
832
+ elif col == 'const':
833
+ set_alpha = set_alpha +[['normal']]
820
834
  return set_alpha
821
835
  return [[x for x in self._distribution]] * self._characteristics
822
836
 
@@ -897,10 +911,12 @@ class ObjectiveFunction(object):
897
911
  return ([self._model_type_codes[dispersion]])
898
912
 
899
913
  def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None, obj_1=None, model_nature=None):
900
- r'''
914
+ '''
901
915
  setup for naming of the model summary
902
916
  '''
917
+ if self.no_extra_param and dispersion ==1:
903
918
 
919
+ betas = np.append(betas, self.nb_parma)
904
920
 
905
921
  self.name_deleter = []
906
922
  group_rpm = None
@@ -1025,7 +1041,7 @@ class ObjectiveFunction(object):
1025
1041
  try:
1026
1042
  if len(betas) != len(names):
1027
1043
  print('no draws is', no_draws)
1028
- print('fix_theano')
1044
+
1029
1045
  except Exception as e:
1030
1046
  print(e)
1031
1047
 
@@ -1052,7 +1068,8 @@ class ObjectiveFunction(object):
1052
1068
  if not isinstance(self.pvalues, np.ndarray):
1053
1069
  raise Exception
1054
1070
 
1055
-
1071
+ if 'nb' in self.coeff_names and self.no_extra_param:
1072
+ self.pvalues = np.append(self.pvalues,0)
1056
1073
 
1057
1074
  if self.please_print or save_state:
1058
1075
 
@@ -1068,17 +1085,22 @@ class ObjectiveFunction(object):
1068
1085
 
1069
1086
  if solution is not None:
1070
1087
  print(f"{self._obj_2}: {self.round_with_padding(solution[self._obj_2], 2)}")
1071
-
1088
+
1072
1089
  self.pvalues = [self.round_with_padding(
1073
1090
  x, 2) for x in self.pvalues]
1074
1091
  signif_list = self.pvalue_asterix_add(self.pvalues)
1075
1092
  if model == 1:
1076
1093
 
1077
- self.coeff_[-1] = 1/np.exp(self.coeff_[-1])
1078
- if self.coeff_[-1] < 0.25:
1094
+ #self.coeff_[-1] = 1/np.exp(self.coeff_[-1])
1095
+ if self.no_extra_param:
1096
+ self.coeff_ = np.append(self.coeff_, self.nb_parma)
1097
+ self.stderr = np.append(self.stderr, 0.00001)
1098
+ self.zvalues = np.append(self.zvalues, 50)
1099
+
1100
+ elif self.coeff_[-1] < 0.25:
1079
1101
  print(self.coeff_[-1], 'Warning Check Dispersion')
1080
1102
  print(np.exp(self.coeff_[-1]))
1081
- self.coeff_[-1] = np.exp(self.coeff_[-1]) # min possible value for negbinom
1103
+ #self.coeff_[-1] = np.exp(self.coeff_[-1]) # min possible value for negbinom
1082
1104
 
1083
1105
  self.coeff_ = [self.round_with_padding(x, 2) for x in self.coeff_]
1084
1106
 
@@ -1301,6 +1323,7 @@ class ObjectiveFunction(object):
1301
1323
 
1302
1324
  if 'AADT' in self._characteristics_names[col]:
1303
1325
  new_transform = [['log']]
1326
+ #new_transform = [['no']]
1304
1327
  transform_set = transform_set + new_transform
1305
1328
 
1306
1329
  elif all(x_data[col] <= 5):
@@ -1340,6 +1363,18 @@ class ObjectiveFunction(object):
1340
1363
 
1341
1364
  return transform_set
1342
1365
 
1366
+ def poisson_mean_get_dispersion(self, betas, X, y):
1367
+ eVy = self._loglik_gradient(betas, X, y, None, X, None, None, False, False, dispersion=0,
1368
+ return_EV=True,
1369
+ zi_list=None, draws_grouped=None, Xgroup=None)
1370
+
1371
+ ab = ((y - eVy)**2 - eVy)/eVy
1372
+ bb = eVy -1
1373
+ disp = sm.OLS(ab.ravel(), bb.ravel()).fit()
1374
+ gamma = disp.params[0]
1375
+ print(f'dispersion is {gamma}')
1376
+ return gamma
1377
+
1343
1378
  def validation(self, betas, y, X, Xr=None, dispersion=0, rdm_cor_fit=None, zi_list=None, exog_infl=None,
1344
1379
  model_nature=None, halton=1, testing=1, validation=0):
1345
1380
  'validation if mu needs to be calculated'
@@ -1373,7 +1408,7 @@ class ObjectiveFunction(object):
1373
1408
  XG = model_nature.get('XGtest')[:total_percent, :, :]
1374
1409
  else:
1375
1410
  XG = model_nature.get('XGtest')[total_percent:, :, :]
1376
- print('chekc this is doing it wright')
1411
+
1377
1412
  else:
1378
1413
  if 'XG' in model_nature:
1379
1414
  XG = model_nature.get('XG')
@@ -1495,7 +1530,7 @@ class ObjectiveFunction(object):
1495
1530
  5: herogeneity_in _means
1496
1531
 
1497
1532
 
1498
- a: how to transofrm the original data
1533
+ a: how to transform the original data
1499
1534
  b: grab dispersion '''
1500
1535
 
1501
1536
  # todo: better way
@@ -1843,7 +1878,10 @@ class ObjectiveFunction(object):
1843
1878
  elif dispersion == 4:
1844
1879
  return 2
1845
1880
  else:
1846
- return 1
1881
+ if self.no_extra_param:
1882
+ return 0
1883
+ else:
1884
+ return 1
1847
1885
 
1848
1886
  def get_pvalue_info_alt(self, pvalues, names, sig_value=0.05, dispersion=0, is_halton=1, delete=0,
1849
1887
  return_violated_terms=0):
@@ -1858,6 +1896,7 @@ class ObjectiveFunction(object):
1858
1896
 
1859
1897
  else:
1860
1898
  slice_this_amount = self.num_dispersion_params(dispersion)
1899
+ slice_this_amount = 1 #TODO handle this
1861
1900
  if pvalues[-1] > sig_value:
1862
1901
  vio_counts += 1
1863
1902
  subpvalues = pvalues[:-slice_this_amount].copy()
@@ -3502,21 +3541,38 @@ class ObjectiveFunction(object):
3502
3541
  # if gamma <= 0.01: #min defined value for stable nb
3503
3542
  # gamma = 0.01
3504
3543
 
3544
+ #g = stats.gamma.rvs(gamma, scale = lam/gamma, size = 1.0 / gamma * lam ** Q )
3505
3545
 
3546
+ #gg = stats.poisson.rvs(g)
3506
3547
 
3548
+
3507
3549
 
3550
+
3508
3551
  endog = y
3509
3552
  mu = lam
3553
+ ''''
3554
+ mu = lam*np.exp(gamma) #TODO check that this does not need to be multiplied
3510
3555
  alpha = np.exp(gamma)
3511
- #size = 1.0 / alpha * mu ** Q
3512
- alpha_size = alpha * mu ** Q
3513
- # prob = size/(size+mu)
3514
- prob = alpha / (alpha + mu)
3515
- # prob = 1/(1+mu*alpha)
3556
+
3557
+ '''
3558
+ alpha = gamma
3559
+ size = 1.0 / alpha * mu ** Q
3560
+
3561
+ prob = size/(size+mu)
3562
+
3563
+
3516
3564
 
3517
3565
  '''test'''
3518
3566
 
3519
3567
 
3568
+ '''
3569
+ size = 1 / np.exp(gamma) * mu ** 0
3570
+ prob = size / (size + mu)
3571
+ coeff = (gammaln(size + y) - gammaln(y + 1) -
3572
+ gammaln(size))
3573
+ llf = coeff + size * np.log(prob) + y * np.log(1 - prob)
3574
+ '''
3575
+
3520
3576
  try:
3521
3577
  # print(np.shape(y),np.shape(size), np.shape(prob))
3522
3578
  #gg2 = self.negbinom_pmf(alpha_size, size/(size+mu), y)
@@ -3528,22 +3584,28 @@ class ObjectiveFunction(object):
3528
3584
  #start_time = time.time()
3529
3585
  #for _ in range(10000):
3530
3586
 
3531
- #gg = self.negbinom_pmf(alpha_size, prob, y)
3587
+
3532
3588
  #end_time = time.time()
3533
3589
  #print("Custom functieon time:", end_time - start_time)
3534
3590
  #start_time = time.time()
3535
3591
  #for _ in range(10000):
3592
+ '''
3536
3593
  gg = np.exp(
3537
3594
  gammaln(y + alpha) - gammaln(y + 1) - gammaln(alpha) + y * np.log(mu) + alpha * np.log(alpha) - (
3538
3595
  y + alpha) * np.log(mu + alpha))
3539
3596
  gg[np.isnan(gg)] = 1
3597
+ '''
3598
+ gg_alt = nbinom.pmf(y ,1/alpha, prob)
3599
+ #gg_alt_2 = (gammaln(size + y) - gammaln(y + 1) -
3600
+ #gammaln(size)) + size * np.log(prob) + y * np.log(1 - prob)
3601
+ #print('check theses')
3540
3602
  #gg = nbinom.pmf(y ,alpha, prob)
3541
3603
  #end_time = time.time()
3542
3604
  #print("Custom functieon time:", end_time - start_time)
3543
3605
 
3544
3606
  except Exception as e:
3545
3607
  print(e)
3546
- return gg
3608
+ return gg_alt
3547
3609
 
3548
3610
  def lindley_pmf(self, x, r, theta, k=50):
3549
3611
  """
@@ -3690,8 +3752,8 @@ class ObjectiveFunction(object):
3690
3752
 
3691
3753
  if dispersion == 1 or dispersion == 4: # nb
3692
3754
  # if model_nature is not None and 'dispersion_penalty' in model_nature:
3693
-
3694
-
3755
+ #b_gam = 1/np.exp(b_gam)
3756
+ #print(b_gam)
3695
3757
  if b_gam <= 0:
3696
3758
  #penalty += 100
3697
3759
  #penalty += abs(b_gam)
@@ -3699,9 +3761,9 @@ class ObjectiveFunction(object):
3699
3761
  #b_gam = 1
3700
3762
 
3701
3763
  # if b_gam < 0.03:
3702
- penalty += min(1, np.abs(b_gam))
3764
+ penalty += min(1, np.abs(b_gam), 0)
3703
3765
 
3704
- b_gam = 0.001
3766
+ #b_gam = 0.001
3705
3767
  #
3706
3768
 
3707
3769
  #if b_gam >= 10:
@@ -3733,8 +3795,15 @@ class ObjectiveFunction(object):
3733
3795
  def eXB_calc(self, params_main, Xd, offset, dispersion, b_gam=None):
3734
3796
 
3735
3797
  # print('this was 0')
3736
- eta = np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
3798
+ if dispersion:
3799
+ eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
3800
+
3801
+ #eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])+dispersion
3802
+ #print('check if this holds size')
3803
+ else:
3804
+ eta = np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
3737
3805
  eta = np.array(eta)
3806
+
3738
3807
  # eta = np.float64(eta)
3739
3808
  # eta = np.dot(Xd, params_main)+offset[:,:,0]
3740
3809
  # eta2 = np.dot(Xd, params_main)[:,:,None]+np.array(offset[:,:,:])
@@ -3907,6 +3976,8 @@ class ObjectiveFunction(object):
3907
3976
  if dispersion == 0 or dispersion == 3:
3908
3977
  return 0
3909
3978
  else:
3979
+
3980
+
3910
3981
  return 1
3911
3982
 
3912
3983
  def _prob_product_across_panels(self, pch, panel_info):
@@ -3962,7 +4033,7 @@ class ObjectiveFunction(object):
3962
4033
  if y[i] == 0:
3963
4034
  gr_e[i] = 0
3964
4035
 
3965
- if self.is_dispersion(dispersion):
4036
+ if self.is_dispersion(dispersion) and not self.no_extra_param:
3966
4037
  gr_d = np.zeros((N, 1))
3967
4038
  if dispersion == 1:
3968
4039
  # trying alt
@@ -4067,9 +4138,9 @@ class ObjectiveFunction(object):
4067
4138
  dprod_r = dev.np.einsum("njk,njr -> nkr", Xdr,
4068
4139
  einsum_model_form, dtype=np.float64) # (N,K,R)
4069
4140
  der_prod_r = dprod_r * der * proba_n[:, None, :] # (N,K,R)
4070
- der_prod_r = dprod_r * der * proba_n[:, X_tril_idx, :] # I think this is the case check
4141
+ #der_prod_r = dprod_r * der * proba_n[:, X_tril_idx, :] # I think this is the case check
4071
4142
  der_prod_r = dprod_r[:, X_tril_idx, :] * der * proba_n[:, None, :] # or this one
4072
- print('which one of these')
4143
+ #print('which one of these')
4073
4144
  der_t = self._compute_derivatives(
4074
4145
  br, draws_[:, draws_tril_idx, :], brstd, self.dist_fit) # (N,K,R)
4075
4146
  # er_t = self._compute_derivatives(br, draws_, brstd[:, draws_tril_idx,: ], self.dist_fit, draws_tril_idx)
@@ -4132,8 +4203,12 @@ class ObjectiveFunction(object):
4132
4203
  grad_n = self._concat_gradients(
4133
4204
  (gr_f, gr_u, gr_s, gr_e)) / Rlik # (N,K)
4134
4205
  else:
4135
- grad_n = self._concat_gradients(
4136
- (gr_f, gr_u, gr_s, gr_h, gr_hs, gr_d[:, None])) / Rlik # (N,K)
4206
+ if self.no_extra_param:
4207
+ grad_n = self._concat_gradients(
4208
+ (gr_f, gr_u, gr_s, gr_h, gr_hs)) / Rlik # (N,K)
4209
+ else:
4210
+ grad_n = self._concat_gradients(
4211
+ (gr_f, gr_u, gr_s, gr_h, gr_hs, gr_d[:, None])) / Rlik # (N,K)
4137
4212
  grad_n = np.nan_to_num(grad_n, nan=0, posinf=10000, neginf=-10000)
4138
4213
  grad_n = np.clip(grad_n, -1000, 1000)
4139
4214
  n = np.shape(grad_n)[0]
@@ -4290,7 +4365,7 @@ class ObjectiveFunction(object):
4290
4365
  return proba_r.sum(axis=1), np.squeeze(proba_r)
4291
4366
 
4292
4367
  def _penalty_betas(self, betas, dispersion, penalty, penalty_ap=100.0):
4293
- penalty_val = 0.05
4368
+ penalty_val = 0.1
4294
4369
  penalty_val_max = 130
4295
4370
 
4296
4371
  # print('change_later')
@@ -4306,8 +4381,8 @@ class ObjectiveFunction(object):
4306
4381
  if abs(i) > penalty_val_max:
4307
4382
  penalty += abs(i)
4308
4383
 
4309
- # if abs(i) < penalty_val:
4310
- # penalty += 5
4384
+ #if abs(i) < penalty_val:
4385
+ # penalty += 5
4311
4386
 
4312
4387
  # penalty = 0
4313
4388
  return penalty
@@ -4414,8 +4489,7 @@ class ObjectiveFunction(object):
4414
4489
  index += 1
4415
4490
 
4416
4491
  brstd = br_std
4417
- print(brstd)
4418
- print(brstd)
4492
+
4419
4493
 
4420
4494
 
4421
4495
  def _loglik_gradient(self, betas, Xd, y, draws=None, Xf=None, Xr=None, batch_size=None, return_gradient=False,
@@ -4447,7 +4521,7 @@ class ObjectiveFunction(object):
4447
4521
  penalty = self._penalty_betas(
4448
4522
  betas, dispersion, penalty, float(len(y) / 10.0))
4449
4523
  self.n_obs = len(y) # feeds into gradient
4450
- if draws is None and draws_grouped is None and (
4524
+ if draws is None and draws_grouped is None and (model_nature is None or
4451
4525
  'draws_hetro' not in model_nature or model_nature.get('draws_hetro').shape[1] == 0):
4452
4526
  #TODO do i shuffle the draws
4453
4527
  if type(Xd) == dict:
@@ -4594,7 +4668,9 @@ class ObjectiveFunction(object):
4594
4668
  Kf = 0
4595
4669
  else:
4596
4670
  if n_coeff != len(betas):
4597
- raise Exception
4671
+ raise Exception(
4672
+
4673
+ )
4598
4674
  Bf = betas[0:Kf] # Fixed betas
4599
4675
 
4600
4676
 
@@ -4696,7 +4772,8 @@ class ObjectiveFunction(object):
4696
4772
  eVd = self.lam_transform(eVd, dispersion, betas[-1])
4697
4773
 
4698
4774
  if self.is_dispersion(dispersion):
4699
- penalty, betas[-1] = self._penalty_dispersion(
4775
+ if not self.no_extra_param:
4776
+ penalty, betas[-1] = self._penalty_dispersion(
4700
4777
  dispersion, betas[-1], eVd, y, penalty, model_nature)
4701
4778
 
4702
4779
  '''
@@ -5341,7 +5418,7 @@ class ObjectiveFunction(object):
5341
5418
  return a
5342
5419
 
5343
5420
  def fitRegression(self, mod,
5344
- dispersion=0, maxiter=2000, batch_size=None, num_hess=False):
5421
+ dispersion=0, maxiter=2000, batch_size=None, num_hess=False, **kwargs):
5345
5422
 
5346
5423
  """
5347
5424
  Fits a poisson regression given data and outcomes if dispersion is not declared
@@ -5387,6 +5464,8 @@ class ObjectiveFunction(object):
5387
5464
  _g, pg, kg = 0, 0, 0
5388
5465
 
5389
5466
  dispersion_param_num = self.is_dispersion(dispersion)
5467
+ if self.no_extra_param:
5468
+ dispersion_param_num =0
5390
5469
 
5391
5470
  #paramNum = self.get_param_num(dispersion)
5392
5471
  self.no_random_paramaters = 0
@@ -5441,17 +5520,26 @@ class ObjectiveFunction(object):
5441
5520
  else:
5442
5521
  bb[0] = self.constant_value
5443
5522
  if dispersion == 1:
5444
- bb[-1] = self.negative_binomial_value
5523
+ if not self.no_extra_param:
5524
+ bb[-1] = self.negative_binomial_value
5445
5525
  bounds = None
5446
5526
 
5527
+
5528
+
5447
5529
  # intial_beta = minimize(self._loglik_gradient, bb, args =(XX, y, None, None, None, None, calc_gradient, hess_est, dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod), method = 'nelder-mead', options={'gtol': 1e-7*len(XX)})
5448
5530
  hess_est = False if method2 in ['L-BFGS-B', 'BFGS_2', 'Nelder-Mead-BFGS'] else True
5449
- initial_beta = self._minimize(self._loglik_gradient, bb,
5531
+
5532
+ if self.no_extra_param:
5533
+ dispersion_poisson = 0
5534
+ initial_beta = self._minimize(self._loglik_gradient, bb,
5450
5535
  args=(XX, y, None, None, None, None, calc_gradient, hess_est,
5451
- dispersion, 0, False, 0, None, None, None, None, None,
5536
+ dispersion_poisson, 0, False, 0, None, None, None, None, None,
5452
5537
  mod),
5453
5538
  method=method2, tol=1e-5, options={'gtol': tol['gtol']},
5454
5539
  bounds=bounds)
5540
+ if dispersion:
5541
+ nb_parma = self.poisson_mean_get_dispersion(initial_beta.x, XX, y)
5542
+
5455
5543
 
5456
5544
 
5457
5545
 
@@ -5551,7 +5639,7 @@ class ObjectiveFunction(object):
5551
5639
 
5552
5640
  b = [b[i] if i > len(self.none_handler(self.fixed_fit)) + len(
5553
5641
  self.none_handler(self.rdm_fit)) + len(
5554
- self.none_handler(self.rdm_cor_fit)) else b[i] / 1.3 for i in range(len(b))]
5642
+ self.none_handler(self.rdm_cor_fit)) else b[i] / 1 for i in range(len(b))]
5555
5643
  else:
5556
5644
  b = bb
5557
5645
 
@@ -5561,9 +5649,10 @@ class ObjectiveFunction(object):
5561
5649
  else:
5562
5650
  b = np.insert(b, -1, np.random.uniform(0.05, 0.1))
5563
5651
  if dispersion == 1:
5564
- b[-1] = np.abs(b[-1])
5565
- if b[-1] > 10:
5566
- b[-1] = 5
5652
+ if not self.no_extra_param:
5653
+ b[-1] = np.abs(b[-1])
5654
+ if b[-1] > 10:
5655
+ b[-1] = 5
5567
5656
  elif dispersion == 2:
5568
5657
  b[-1] = .5
5569
5658
  if method == 'L-BFGS-B' or method2 == 'L-BFGS-B':
@@ -5689,13 +5778,30 @@ class ObjectiveFunction(object):
5689
5778
 
5690
5779
  if draws is None and draws_hetro is not None:
5691
5780
  print('hold')
5692
- betas_est = self._minimize(self._loglik_gradient, b, args=(
5693
- X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
5694
- self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
5695
- method=method2, tol=tol['ftol'],
5696
- options={'gtol': tol['gtol']}, bounds=bounds,
5697
- hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
5781
+ #self.grad_yes = True
5782
+ #self.hess_yes = True
5698
5783
 
5784
+ if self.no_extra_param:
5785
+ dispersion_poisson = 0
5786
+ betas_est = self._minimize(self._loglik_gradient, b, args=(
5787
+ X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion_poisson, 0, False, 0,
5788
+ self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
5789
+ method=method2, tol=tol['ftol'],
5790
+ options={'gtol': tol['gtol']}, bounds=bounds,
5791
+ hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
5792
+ if dispersion:
5793
+ initial_fit_beta = betas_est.x
5794
+ parmas = np.append(initial_fit_beta, nb_parma)
5795
+ self.nb_parma = nb_parma
5796
+ print(f'neg binomi,{self.nb_parma}')
5797
+ betas_est = self._minimize(self._loglik_gradient, initial_fit_beta, args=(
5798
+ X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
5799
+ self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
5800
+ method=method2, tol=tol['ftol'],
5801
+ options={'gtol': tol['gtol']}, bounds=bounds,
5802
+ hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
5803
+
5804
+ print('refit with estimation of NB')
5699
5805
  # self.numerical_hessian_calc = True
5700
5806
  if self.numerical_hessian_calc:
5701
5807
  try:
@@ -5994,7 +6100,7 @@ class ObjectiveFunction(object):
5994
6100
  return delim + self._model_type_codes[dispersion]
5995
6101
 
5996
6102
  def self_standardize_positive(self, X):
5997
- scaler = StandardScaler()
6103
+ scaler = MinMaxScaler()
5998
6104
  if type(X) == list:
5999
6105
  return X
6000
6106
 
@@ -6004,12 +6110,26 @@ class ObjectiveFunction(object):
6004
6110
  # Reshaping to 2D - combining the last two dimensions
6005
6111
  df_tf_reshaped = X.reshape(original_shape[0], -1)
6006
6112
  df_tf_scaled = scaler.fit_transform(df_tf_reshaped)
6007
- df_tf_scaled = df_tf_scaled - df_tf_scaled.min()
6113
+ #df_tf_scaled = df_tf_scaled - df_tf_scaled.min()
6008
6114
  # Reshape back to original 3D shape if necessary
6009
6115
  df_tf = df_tf_scaled.reshape(original_shape)
6010
6116
  return df_tf
6011
6117
  else:
6012
- raise X
6118
+ # Initialize the MinMaxScaler
6119
+ scaler = MinMaxScaler()
6120
+ float_columns = X.select_dtypes(include=['float64', 'float32', 'int']).columns.difference(['const', 'offset, "EXPOSE', 'Constant', 'constant'])
6121
+ non_numeric_columns = X.select_dtypes(exclude=['float64', 'float32', 'int']).columns
6122
+
6123
+ # Fit the scaler to the float columns and transform them
6124
+ X[float_columns] = scaler.fit_transform(X[float_columns])
6125
+ # Fit the scaler to the data and transform it
6126
+ #scaled_data = scaler.fit_transform(X)
6127
+
6128
+ # Convert the result back to a DataFrame
6129
+ #scaled_df = pd.DataFrame(scaled_data, columns=X.columns)
6130
+
6131
+
6132
+ return X
6013
6133
 
6014
6134
  def make_regression_from_terms(self, fixed=None, rdm=None, rdm_cor_fit=None, distribution=None, dispersion=None,
6015
6135
  *args, **kwargs):
@@ -6065,7 +6185,7 @@ class ObjectiveFunction(object):
6065
6185
  t, idx, df_test[:, :, idx])
6066
6186
  if np.max(df_tf[:, :, idx]) >= 77000:
6067
6187
 
6068
- raise Exception('should not be possible')
6188
+ print('should not be possible')
6069
6189
 
6070
6190
  self.define_selfs_fixed_rdm_cor(model_nature)
6071
6191
  indices = self.get_named_indices(self.fixed_fit)
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: metacountregressor
3
- Version: 0.1.113
3
+ Version: 0.1.119
4
4
  Summary: Extensions for a Python package for estimation of count models.
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern
@@ -9,9 +9,18 @@ License: QUT
9
9
  Requires-Python: >=3.10
10
10
  Description-Content-Type: text/markdown
11
11
  License-File: LICENSE.txt
12
- Requires-Dist: numpy >=1.13.1
13
- Requires-Dist: scipy >=1.0.0
12
+ Requires-Dist: numpy>=1.13.1
13
+ Requires-Dist: scipy>=1.0.0
14
14
  Requires-Dist: requests
15
+ Dynamic: author
16
+ Dynamic: author-email
17
+ Dynamic: description
18
+ Dynamic: description-content-type
19
+ Dynamic: home-page
20
+ Dynamic: license
21
+ Dynamic: requires-dist
22
+ Dynamic: requires-python
23
+ Dynamic: summary
15
24
 
16
25
  <div style="display: flex; align-items: center;">
17
26
  <img src="https://github.com/zahern/data/raw/main/m.png" alt="My Image" style="width: 100px; margin-right: 20px;">
@@ -1,20 +1,20 @@
1
1
  metacountregressor/__init__.py,sha256=UM4zaqoAcZVWyx3SeL9bRS8xpQ_iLZU9fIIARWmfjis,2937
2
2
  metacountregressor/_device_cust.py,sha256=759fnKmTYccJm4Lpi9_1reurh6OB9d6q9soPR0PltKc,2047
3
- metacountregressor/app_main.py,sha256=vdI_b_0GMYWMIUd3iN-Cak-xukM8uxNhaUggUl5SKPs,9695
3
+ metacountregressor/app_main.py,sha256=vY3GczTbGbBRalbzMkl_9jVW7RMgEOc6z2Dr1IZJv9c,10014
4
4
  metacountregressor/data_split_helper.py,sha256=M2fIMdIO8znUaYhx5wlacRyNWdQjNYu1z1wkE-kFUYU,3373
5
5
  metacountregressor/halton.py,sha256=jhovA45UBoZYU9g-hl6Lb2sBIx_ZBTNdPrpgkzR9fng,9463
6
6
  metacountregressor/helperprocess.py,sha256=Sc5gJ7ffFlkya5B5KQwE33xxXuIQyF6OaYtSikLa3pQ,12968
7
- metacountregressor/main.py,sha256=3prcbUriyMRqpIUsW4dt-X-qLV5mLGI4jA2uh9ZQDRM,19638
7
+ metacountregressor/main.py,sha256=lHrj2Hvj2hTGi-2mLSbuGEHDDILl4V-ml9e9Y_Hjpts,20560
8
8
  metacountregressor/main_old.py,sha256=eTS4ygq27MnU-dZ_j983Ucb-D5XfbVF8OJQK2hVVLZc,24123
9
9
  metacountregressor/metaheuristics.py,sha256=UyUmHyuQd5vZ2wdaVL0dWpJfBOBdtCAqcA0GlFzouH8,105849
10
10
  metacountregressor/pareto_file.py,sha256=whySaoPAUWYjyI8zo0hwAOa3rFk6SIUlHSpqZiLur0k,23096
11
11
  metacountregressor/pareto_logger__plot.py,sha256=mEU2QN4wmsM7t39GJ_XhJ_jjsdl09JOmG0U2jICrAkI,30037
12
12
  metacountregressor/setup.py,sha256=8w6IqX0tJsbYrOI1BJLIJCIvOnunKli5I9fsF5PhHv4,919
13
13
  metacountregressor/single_objective_finder.py,sha256=jVG7GJBqzSP4_riYr-kMMKy_LE3SlGmKMunNhHYxgRg,8011
14
- metacountregressor/solution.py,sha256=SzI3JItc7CzVcJOAOiztQJD0WMRaE0pw1ps55kP90tc,268893
14
+ metacountregressor/solution.py,sha256=4pfpDcH38eTIGV7DIDL5ZOyB_ND-iDzod964-RBhC5o,274195
15
15
  metacountregressor/test_generated_paper2.py,sha256=pwOoRzl1jJIIOUAAvbkT6HmmTQ81mwpsshn9SLdKOg8,3927
16
- metacountregressor-0.1.113.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
17
- metacountregressor-0.1.113.dist-info/METADATA,sha256=402HrmFmSlQ904TSzP2i8H7Ymx_hurLEpI8qhNTkvJk,23223
18
- metacountregressor-0.1.113.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
19
- metacountregressor-0.1.113.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
20
- metacountregressor-0.1.113.dist-info/RECORD,,
16
+ metacountregressor-0.1.119.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
17
+ metacountregressor-0.1.119.dist-info/METADATA,sha256=Kya6i22baCjmfCkzW-1VnaAjzinJS7iI15R05qWr5Z8,23415
18
+ metacountregressor-0.1.119.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
19
+ metacountregressor-0.1.119.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
20
+ metacountregressor-0.1.119.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.1.0)
2
+ Generator: setuptools (75.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5