metacountregressor 0.1.48__py3-none-any.whl → 0.1.50__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,105 @@
1
- from .pareto_file import _pareto
2
- from .pareto_file import _solution
3
- from ._device_cust import device
1
+ import warnings
2
+ import argparse
3
+ import csv
4
+ import faulthandler
5
+ import sys
6
+ import timeit
7
+ from collections import namedtuple
8
+ print('loaded standard packages')
9
+
10
+ import numpy as np
11
+
12
+ import pandas as pd
13
+
14
+ from helperprocess import*
15
+ print('loaded helper')
16
+ from .metaheuristics import (differential_evolution,
17
+ harmony_search,
18
+ simulated_annealing)
4
19
  from .solution import ObjectiveFunction
5
- from .helperprocess import*
6
- from .metaheuristics import*
7
20
 
8
21
 
22
+
23
+
24
+
25
+
26
+
27
+
28
+ import pandas as pd
29
+ df = pd.read_csv("https://raw.githubusercontent.com/zahern/data/main/Ex-16-3.csv")
30
+
31
+
32
+ y = df['FREQ'] #Frequency of crashes
33
+ X = df.drop(columns=['FREQ', 'ID']) #Drop Y, and ID as there are no panels
34
+ X = pd.get_dummies(X, columns=['FC'], prefix=['FC'], prefix_sep='_').astype(int)
35
+ X['Offset'] = np.log(1+X['AADT'] * X['LENGTH'] * 365 / 100000000)
36
+ #X = interactions(X)
37
+ #X = pd.get_dummies(X, columns=['FC'], prefix=['FC'], prefix_sep='_')
38
+
39
+
40
+ # Fit the model with metacountregressor
41
+
42
+ other_data = 1
43
+ if other_data:
44
+ df = pd.read_csv('panel_synth.csv') # read in the data
45
+ y = df[['Y']].copy() # only consider crashes
46
+ y.rename(columns={"crashes": "Y"}, inplace=True)
47
+ panels = df['ind_id']
48
+
49
+ X = df.drop(columns=['Y', 'alt'])
50
+ #Model Decisions, Specify for Intial Optimization
51
+ manual_fit_spec = {
52
+ 'fixed_terms': ['added_fixed1', 'added_fixed2', 'added_fixed3', 'constant'],
53
+ 'rdm_terms': [],
54
+ 'rdm_cor_terms': ['added_random1:grpd| normal', 'added_random2:grpd| uniform', 'added_random3:grpd| triangular'],
55
+ 'grouped_terms': [],
56
+ 'hetro_in_means': [],
57
+ 'transformations': ['no', 'no', 'no', 'no', 'no', 'no', 'no'],
58
+ 'dispersion': 0
59
+ }
60
+ arguments = dict()
61
+ arguments['group'] = 'group'
62
+ arguments['panels'] = 'ind_id'
63
+ arguments['ID'] ='ind_id'
64
+ else:
65
+ #Model Decisions, Specify for Intial Optimization
66
+ manual_fit_spec = {
67
+ 'fixed_terms': ['const', 'FC_2'],
68
+ 'rdm_terms': ['MXGRADE:triangular', 'AVEPRE:normal'],
69
+ 'rdm_cor_terms': [],
70
+ 'grouped_terms': [],
71
+ 'hetro_in_means': ['URB:triangular', 'ACCESS:triangular', 'FC_1:triangular'],
72
+ 'transformations': ['no', 'no', 'no', 'no', 'no', 'no', 'no'],
73
+ 'dispersion': 0
74
+ }
75
+
76
+
77
+
78
+
79
+ #select one of the algorithms
80
+ alg = [harmony_search, differential_evolution, simulated_annealing]
81
+ alg = alg[0] #harmony search
82
+
83
+
84
+
85
+
86
+
87
+ #Search Arguments
88
+ arguments = {
89
+ 'algorithm': 'hs',
90
+ 'test_percentage': 0.2,
91
+ 'test_complexity': 6,
92
+ 'instance_number': 'name',
93
+ 'Manual_Fit': manual_fit_spec
94
+ }
95
+
96
+ arguments['group'] = 'group'
97
+ arguments['panels'] = 'ind_id'
98
+ arguments['ID'] ='ind_id'
99
+
100
+
101
+ arguments_hyperparamaters = dict()
102
+
103
+ # end default constructor
104
+ obj_fun = ObjectiveFunction(X, y, **arguments)
105
+ results = alg(obj_fun, None, **arguments_hyperparamaters)
@@ -7,7 +7,8 @@ if _gpu_available:
7
7
  import cupy
8
8
  _gpu_available = True
9
9
  except ImportError:
10
- gpu_available = False
10
+ print('CuPy was not loaded')
11
+ _gpu_available = False
11
12
  pass
12
13
  class Device():
13
14
  def __init__(self):
Binary file
@@ -41,6 +41,7 @@ from statsmodels.tools.numdiff import approx_fprime, approx_hess
41
41
  from sklearn.preprocessing import StandardScaler
42
42
  #from tabulate import tabulate
43
43
  from texttable import Texttable
44
+ #from optimparallel import minimize_parallel
44
45
 
45
46
  from ._device_cust import device as dev
46
47
  #from optimparallel import minimize_parallel
@@ -134,6 +135,14 @@ class ObjectiveFunction(object):
134
135
  if self.other_bic:
135
136
  print('change this to false latter ')
136
137
  offset = None
138
+
139
+ #initi
140
+ self.constant_value = -5.5
141
+ self.negative_binomial_value = 0.05
142
+
143
+ self.verbose_safe = True
144
+ self.zi_force = None #Analst want a zi model and formally declares the zi components below
145
+ self.zi_force_names = None #delare the zi components
137
146
  self.please_print = 1
138
147
  self.group_halton = None
139
148
  self.grad_yes = False
@@ -171,9 +180,11 @@ class ObjectiveFunction(object):
171
180
  self._panels = 1
172
181
  self.is_multi = True
173
182
  self.method = 'L-BFGS-B' # alternatives 'BFGS_2', 'BFGS
183
+ self.method = 'BFGS_2'
184
+ self.method = 'Nelder-Mead-BFGS'
185
+ #Nelder-Mead-BFGS
174
186
 
175
-
176
- self._max_characteristics = 40
187
+ self._max_characteristics = 26
177
188
 
178
189
 
179
190
 
@@ -183,11 +194,17 @@ class ObjectiveFunction(object):
183
194
  'algorithm', '_random_seed', '_max_time',
184
195
  'forcedvariables', '_obj_1', '_obj_2', '_par',
185
196
  'Manuel_Estimate', 'test_percentage', 'is_multi', 'val_percentage'
186
- 'complexity_level', '_hms', '_mpai', 'group', '_max_characteristics']
197
+ 'complexity_level', '_hms', '_mpai', 'group', '_max_characteristics', 'zi_force_names']
187
198
  for k in kwargs.keys():
188
199
  if k in acceptable_keys_list:
189
200
  self.__setattr__(k, self.tryeval(kwargs[k]))
190
201
 
202
+ if self.zi_force_names is not None:
203
+ self.zi_force = True
204
+ if 'const' not in self.zi_force_names:
205
+ self.zi_force_names = ['const'] + self.zi_force_names
206
+ print('did this work?')
207
+
191
208
  if 'complexity_level' in kwargs:
192
209
  self.complexity_level = kwargs['complexity_level']
193
210
 
@@ -277,11 +294,23 @@ class ObjectiveFunction(object):
277
294
  test_idx = [ii for ii in range(len(id_unique)) if id_unique[ii] not in ids]
278
295
 
279
296
 
297
+ try: #@IgnoreException
298
+ df_train = x_data.loc[train_idx, :]
299
+ df_test = x_data.loc[test_idx, :]
300
+ y_train =y_data.loc[train_idx, :]
301
+ y_test=y_data.loc[test_idx, :]
302
+ except:
303
+ # Convert all values to their real parts
304
+ df_real = x_data.select_dtypes(include=[np.number]).apply(np.real)
280
305
 
281
- df_train = x_data.loc[train_idx, :]
282
- df_test = x_data.loc[test_idx, :]
283
- y_train =y_data.loc[train_idx, :]
284
- y_test=y_data.loc[test_idx, :]
306
+ # Replace the original DataFrame's numerical columns with real-valued ones
307
+ x_data[df_real.columns] = df_real
308
+
309
+ df_train = x_data.iloc[train_idx, :]
310
+ df_test = x_data.iloc[test_idx, :]
311
+ y_train =y_data.iloc[train_idx, :]
312
+ y_test=y_data.iloc[test_idx, :]
313
+
285
314
 
286
315
 
287
316
 
@@ -290,9 +319,13 @@ class ObjectiveFunction(object):
290
319
  #self._x_data, self._x_data_test, self._y_data, self.y_data_test = train_test_split(new_data_test[data_names], y_data, test_size = self.test_percentage, random_state=self.get_random_seed())
291
320
  #data_names = self._random_forest_preprocess()
292
321
 
293
-
294
-
322
+
323
+ self.n_obs = N
295
324
  self._characteristics_names = list(self._x_data.columns)
325
+ if self.zi_force:
326
+
327
+ self.alpha_hurdle = np.isin(self._characteristics_names, [item.split(':')[0] for item in self.zi_force_names]).astype(int).tolist()
328
+ print(1)
296
329
  #self._characteristics_names = [x for x in self._characteristics_names if not 'ID' in x]
297
330
 
298
331
 
@@ -411,10 +444,7 @@ class ObjectiveFunction(object):
411
444
 
412
445
 
413
446
 
414
-
415
-
416
-
417
-
447
+
418
448
 
419
449
 
420
450
  self._samples, self._panels, self._characteristics = self._x_data.shape
@@ -507,7 +537,7 @@ class ObjectiveFunction(object):
507
537
  self.significant = 0
508
538
  # define the states of our explanaotory variables
509
539
 
510
- self._discrete_values = self.define_alphas(self.complexity_level, exclude_this_test, kwargs.get('Keep_Fit', []))
540
+ self._discrete_values = self.define_alphas(self.complexity_level, exclude_this_test, kwargs.get('must_include', []))
511
541
  self._discrete_values = self._discrete_values + \
512
542
  [[x for x in self._distribution]] * self._characteristics
513
543
 
@@ -516,7 +546,7 @@ class ObjectiveFunction(object):
516
546
  if 'model_types' in kwargs:
517
547
  model_types = kwargs['model_types']
518
548
  else:
519
- model_types = [[0,1]] # add 2 for Generalized Poisson
549
+ model_types = [[1]] # add 2 for Generalized Poisson
520
550
 
521
551
 
522
552
  self._discrete_values = self._discrete_values + self.define_poissible_transforms(self._transformations) + model_types
@@ -531,7 +561,7 @@ class ObjectiveFunction(object):
531
561
  # model specs
532
562
  self.endog = None
533
563
  # solution parameters
534
- self._min_characteristics = 4
564
+ self._min_characteristics = 0
535
565
 
536
566
 
537
567
  self._max_hurdle = 4
@@ -586,13 +616,15 @@ class ObjectiveFunction(object):
586
616
  'grouped_terms': [],
587
617
  'hetro_in_means': [],
588
618
  'transformations': ['no'],
589
- 'dispersion': i
619
+ 'dispersion': 1
590
620
  }
591
621
  a = self.modify_initial_fit(manual_fit_spec)
592
622
  self.makeRegression(a)
593
- constant_values.append(self.beta_dict['const'][0][1])
594
- dispersion_values.append(self.beta_dict.get(self._model_type_codes[i], [[0,0],[0,0]])[0][1])
595
-
623
+ try:
624
+ constant_values.append(self.beta_dict['const'][0][1])
625
+ dispersion_values.append(self.beta_dict.get(self._model_type_codes[i], [[0,0],[0,0]])[0][1])
626
+ except:
627
+ print('d')
596
628
  i += 1
597
629
 
598
630
  # Add the values of this iteration to the total
@@ -602,6 +634,7 @@ class ObjectiveFunction(object):
602
634
  # Calculate the averages
603
635
  constant_values_avg = [x / 100 for x in constant_values_total]
604
636
  dispersion_values_avg = [x / 100 for x in dispersion_values_total]
637
+
605
638
 
606
639
 
607
640
 
@@ -654,6 +687,24 @@ class ObjectiveFunction(object):
654
687
  return np.exp(-lam) * (lam**x) / math.factorial(x) * lognorm.pdf(lam, sigma, scale=np.exp(mu))
655
688
  return np.nan_to_num(quad(integrand, 0, np.inf)[0], nan=0)
656
689
 
690
+
691
+ def _call_MAXlike(self):
692
+
693
+ import rpy2.rinterface as rinterface
694
+ import rpy2.robjects as robjects
695
+ import rpy2.robjects as ro
696
+ from rpy2.robjects import pandas2ri
697
+ r = robjects.r
698
+ r['source']('testMAX.R')
699
+ rMAX = robjects.globalenv['maxLik']
700
+ args = (1)
701
+ betas = 1
702
+ def loglike(p): return self._loglik_gradient(
703
+ p, *args)
704
+ loglik = ro.conversion._py2rpy(loglik)
705
+ rMAX(loglik, start = betas)
706
+ raise Exception('not yet implemented')
707
+
657
708
  def _random_forest_call_r(self):
658
709
  import rpy2.rinterface as rinterface
659
710
  import rpy2.robjects as robjects
@@ -960,6 +1011,7 @@ class ObjectiveFunction(object):
960
1011
  zi_fit = self.none_handler(self.zi_fit)
961
1012
  dis_fit = [x for x in self.none_handler(
962
1013
  self.dist_fit)] # check if dis fit is name
1014
+
963
1015
  hetro_long = []
964
1016
  big_hetro = []
965
1017
  if model_nature is not None:
@@ -1027,7 +1079,7 @@ class ObjectiveFunction(object):
1027
1079
  #br_w_names = np.char.add(randvars, "sd.")
1028
1080
  #br_w_names = np.char.add(br_w_names, rand_vars_dis)
1029
1081
  # br_w_names = br_w_names.tolist()
1030
- zi_names = [x for x in self.none_handler(zi_fit)]
1082
+ zi_names = [x + ":inflated" for x in self.none_handler(self.zi_force_names)]
1031
1083
 
1032
1084
  names = fixednames+randvars+chol_names + \
1033
1085
  br_w_names+chol + zi_names+hetro_long+dispersion_name
@@ -1058,7 +1110,7 @@ class ObjectiveFunction(object):
1058
1110
  randvars = [x for x in self.none_handler(rdm_fit)]
1059
1111
  chol_names = [x for x in self.none_handler(rdm_cor_fit)]
1060
1112
 
1061
- zi_names = [x for x in self.none_handler(zi_fit)]
1113
+ zi_names = [x +': inflated' for x in self.none_handler(self.zi_force_names)]
1062
1114
 
1063
1115
  names = fixednames+randvars+chol_names + zi_names+big_hetro+dispersion_name
1064
1116
 
@@ -1075,7 +1127,7 @@ class ObjectiveFunction(object):
1075
1127
  except Exception as e:
1076
1128
  print(e)
1077
1129
 
1078
- def summary_alternative(self, long_print=0, model=0, solution=None, save_state = 0):
1130
+ def summary_alternative(self, long_print=0, model=0, solution=None, save_state = 1):
1079
1131
  fmt = "{:19} {:13} {:13.10f} {:13.10f}{:13.10f} {:13.3g} {:3}"
1080
1132
  coeff_name_str_length = 19
1081
1133
 
@@ -1139,7 +1191,10 @@ class ObjectiveFunction(object):
1139
1191
 
1140
1192
  self.coeff_[-1] = np.abs(self.coeff_[-1])
1141
1193
  if self.coeff_[-1] < 0.25:
1142
- self.coeff_[-1] =.25 #min possible value for negbinom
1194
+ print(self.coeff_[-1], 'is this why')
1195
+ print(np.exp(self.coeff_[-1]))
1196
+ self.coeff_[-1] =np.exp(self.coeff_[-1]) #min possible value for negbinom
1197
+
1143
1198
 
1144
1199
  self.coeff_ = [self.round_with_padding(x, 2) for x in self.coeff_]
1145
1200
 
@@ -1366,7 +1421,11 @@ class ObjectiveFunction(object):
1366
1421
  x_data = self._x_data.copy()
1367
1422
  for col in x_data:
1368
1423
 
1369
- if all(x_data[col] <= 5):
1424
+ if 'AADT' in self._characteristics_names[col]:
1425
+ new_transform = [['log']]
1426
+ transform_set = transform_set + new_transform
1427
+
1428
+ elif all(x_data[col] <= 5):
1370
1429
  new_transform = [['no']]
1371
1430
  transform_set = transform_set + new_transform
1372
1431
  elif col == "Offset":
@@ -1374,7 +1433,7 @@ class ObjectiveFunction(object):
1374
1433
  transform_set = transform_set + new_transform
1375
1434
  else:
1376
1435
  new_transform = transforms.copy()
1377
- if (x_data[col] > 0).all() and (x_data[col] >= 100000).any():
1436
+ if (x_data[col] >= 0).all() and (x_data[col] >= 200).any():
1378
1437
  unwanted = {'no', 2, 3, 'exp', 'fact'}
1379
1438
  new_transform = [
1380
1439
  ele for ele in new_transform if ele not in unwanted]
@@ -1594,17 +1653,33 @@ class ObjectiveFunction(object):
1594
1653
  alpha_hetro= [
1595
1654
  0 if x != 5 else 1 for x in vector[:self._characteristics]]
1596
1655
 
1597
- return {
1598
- 'alpha': alpha,
1599
- 'alpha_rdm': alpha_rdm,
1600
- 'alpha_cor_rdm': alpha_cor_rdm,
1601
- 'alpha_grouped': alpha_grouped,
1602
- 'alpha_hetro': alpha_hetro,
1603
- 'distributions': distributions,
1604
- 'transformations': transformations,
1605
-
1606
- 'dispersion': dispersion
1607
- }
1656
+
1657
+ if self.zi_force == True:
1658
+
1659
+ return {
1660
+ 'alpha': alpha,
1661
+ 'alpha_rdm': alpha_rdm,
1662
+ 'alpha_cor_rdm': alpha_cor_rdm,
1663
+ 'alpha_grouped': alpha_grouped,
1664
+ 'alpha_hetro': alpha_hetro,
1665
+ 'distributions': distributions,
1666
+ 'transformations': transformations,
1667
+ 'exog_infl' : self.zi_force_names,
1668
+ 'dispersion': dispersion
1669
+ }
1670
+
1671
+ else:
1672
+ return {
1673
+ 'alpha': alpha,
1674
+ 'alpha_rdm': alpha_rdm,
1675
+ 'alpha_cor_rdm': alpha_cor_rdm,
1676
+ 'alpha_grouped': alpha_grouped,
1677
+ 'alpha_hetro': alpha_hetro,
1678
+ 'distributions': distributions,
1679
+ 'transformations': transformations,
1680
+
1681
+ 'dispersion': dispersion
1682
+ }
1608
1683
 
1609
1684
  # TODO implement the interactions
1610
1685
 
@@ -2409,7 +2484,7 @@ class ObjectiveFunction(object):
2409
2484
  if self.pvalues is None:
2410
2485
  self.reset_sln()
2411
2486
  return obj_1
2412
- print(1)
2487
+
2413
2488
 
2414
2489
 
2415
2490
  sub_slns.append([obj_1.copy()])
@@ -2784,12 +2859,7 @@ class ObjectiveFunction(object):
2784
2859
  dparams = dparams.sum(axis = 1)
2785
2860
  dalpha = dalpha.sum(axis = 0)
2786
2861
  return np.r_[dparams.sum(0), dalpha.ravel()]
2787
- return score
2788
-
2789
- score_obs = np.concatenate((dparams, dalpha),
2790
- axis=2)
2791
-
2792
- score = np.sum(score_obs, axis=(1,2))
2862
+
2793
2863
 
2794
2864
 
2795
2865
 
@@ -3054,8 +3124,8 @@ class ObjectiveFunction(object):
3054
3124
  dparams = dparams.sum(axis = 1)
3055
3125
  dalpha = dalpha.sum(axis = 0)
3056
3126
  return np.r_[dparams.sum(0), dalpha]
3057
- dparams2 = dparms.sum(axis = 1)
3058
- dalpha1 =dalpha[:,None].sum(axis = 1)
3127
+ #dparams2 = dparms.sum(axis = 1)
3128
+ # dalpha1 =dalpha[:,None].sum(axis = 1)
3059
3129
  return np.concatenate((dparams.sum(0),dalpha[:, None]), axis = 1)
3060
3130
  else:
3061
3131
  dparams = dparams.sum(axis = 1)
@@ -3123,11 +3193,11 @@ class ObjectiveFunction(object):
3123
3193
 
3124
3194
  if obs_specific is False:
3125
3195
  return np.r_[dparams.sum(0), dalpha_lindley.sum(), dalpha.sum()]
3126
- return np.r_[dparams.sum(0) + dparams_lindley.sum(0), dalpha_lindley.sum(), dalpha.sum()]
3196
+ #return np.r_[dparams.sum(0) + dparams_lindley.sum(0), dalpha_lindley.sum(), dalpha.sum()]
3127
3197
  else:
3128
3198
  return np.concatenate((dparams, dalpha_lindley, dalpha), axis=1)
3129
- return np.concatenate((dparams + dparams_lindley, dalpha_lindley, dalpha), axis=1)
3130
- return np.r_[dparams.sum(0), dalpha, dparams_lindley.sum(0), dalpha_lindley]
3199
+ #return np.concatenate((dparams + dparams_lindley, dalpha_lindley, dalpha), axis=1)
3200
+ #return np.r_[dparams.sum(0), dalpha, dparams_lindley.sum(0), dalpha_lindley]
3131
3201
 
3132
3202
  else:
3133
3203
  return np.r_[dparams.sum(0), dalpha]
@@ -3723,8 +3793,8 @@ class ObjectiveFunction(object):
3723
3793
  _type_: _description_
3724
3794
  """
3725
3795
 
3726
- if gamma <= 0.25: #min defined value for stable nb
3727
- gamma = 0.25
3796
+ # if gamma <= 0.01: #min defined value for stable nb
3797
+ # gamma = 0.01
3728
3798
 
3729
3799
  endog = y
3730
3800
  mu = lam
@@ -3777,8 +3847,8 @@ class ObjectiveFunction(object):
3777
3847
  Returns:
3778
3848
  _type_: _description_
3779
3849
  """
3780
- if gamma <= 0.25:
3781
- gamma = 0.25
3850
+ # if gamma <= 0.25:
3851
+ # gamma = 0.25
3782
3852
 
3783
3853
  endog = y
3784
3854
  mu = lam
@@ -3807,8 +3877,8 @@ class ObjectiveFunction(object):
3807
3877
  array: The negative binomial PMF for the given parameters.
3808
3878
  """
3809
3879
 
3810
- if gamma <= 0.25:
3811
- gamma = 0.25
3880
+ # if gamma <= 0.01:
3881
+ # gamma = 0.01
3812
3882
 
3813
3883
  endog = y
3814
3884
  mu = lam
@@ -3897,17 +3967,16 @@ class ObjectiveFunction(object):
3897
3967
 
3898
3968
 
3899
3969
 
3900
- if abs(b_gam) < 0.05:
3901
- penalty += 1/np.abs(b_gam)
3970
+ #if abs(b_gam) < 0.01:
3971
+ # penalty += 1/np.abs(b_gam)
3902
3972
 
3903
3973
 
3904
- if b_gam < 0:
3905
- penalty += 100
3974
+
3906
3975
 
3907
- if b_gam >= 8:
3976
+ if b_gam >= 4.5:
3908
3977
  penalty += b_gam
3909
-
3910
- b_gam = 7.9
3978
+ b_gam = 4.61
3979
+ #b_gam = 7.9
3911
3980
  # penalty += model_nature['dispersion_penalty'] -b_gam
3912
3981
  #penalty += 1/np.max((0.01,abs(b_gam)))
3913
3982
  # b_gam = model_nature['dispersion_penalty']
@@ -3952,7 +4021,7 @@ class ObjectiveFunction(object):
3952
4021
  #b_gam = -.3
3953
4022
  if penalty < 0:
3954
4023
  raise Exception
3955
-
4024
+
3956
4025
  return penalty, b_gam
3957
4026
 
3958
4027
 
@@ -3960,6 +4029,7 @@ class ObjectiveFunction(object):
3960
4029
 
3961
4030
  #print('this was 0')
3962
4031
  eta = np.dot(Xd, params_main)[:,:,None]+np.array(offset[:,:,:])
4032
+ eta = np.array(eta)
3963
4033
  #eta = np.float64(eta)
3964
4034
  #eta = np.dot(Xd, params_main)+offset[:,:,0]
3965
4035
  #eta2 = np.dot(Xd, params_main)[:,:,None]+np.array(offset[:,:,:])
@@ -3974,11 +4044,13 @@ class ObjectiveFunction(object):
3974
4044
 
3975
4045
  #eVd = np.exp(np.clip(eta, 0, EXP_UPPER_LIMIT))
3976
4046
  # eVd = self.my_lindley(np.exp(np.clip(eta, None, EXP_UPPER_LIMIT)), 1) #todo grab param
4047
+
4048
+
3977
4049
  else:
3978
4050
  #eVd = self.my_lindley(np.exp(np.clip(eta, None, EXP_UPPER_LIMIT)), 1.29)
3979
4051
 
3980
4052
  try:
3981
- eVd = np.exp(np.clip(eta, 0, EXP_UPPER_LIMIT))
4053
+ eVd = np.exp(np.clip(eta, None, EXP_UPPER_LIMIT))
3982
4054
  #eta_clip = np.clip(np.array(eta), np.float64(-1000.0), EXP_UPPER_LIMIT)
3983
4055
  # eVd = np.exp(eta_clip)
3984
4056
  except Exception as e:
@@ -4304,7 +4376,7 @@ class ObjectiveFunction(object):
4304
4376
  elif dispersion == 1:
4305
4377
 
4306
4378
  proba_r = self._nonlog_nbin(y, eVd, b_gam)
4307
-
4379
+ # print(1)
4308
4380
  #proba_d = self.dnegbimonli(y, eVd, b_gam )
4309
4381
  # print('fuck if this actually works')
4310
4382
 
@@ -4387,8 +4459,8 @@ class ObjectiveFunction(object):
4387
4459
  if panels is None:
4388
4460
  panels = self.panels
4389
4461
 
4390
- if alpha < 0:
4391
- alpha = np.abs(alpha)
4462
+ # if alpha < 0:
4463
+ # alpha = np.abs(alpha)
4392
4464
  sig, omeg = self.get_dispersion_paramaters(betas, dispersion)
4393
4465
 
4394
4466
 
@@ -4766,7 +4838,7 @@ class ObjectiveFunction(object):
4766
4838
 
4767
4839
  def _penalty_betas(self, betas, dispersion, penalty, penalty_ap=100.0):
4768
4840
  penalty_val = 0.05
4769
- penalty_val_max = 100
4841
+ penalty_val_max = 130
4770
4842
 
4771
4843
  # print('change_later')
4772
4844
  if dispersion != 0:
@@ -4867,8 +4939,18 @@ class ObjectiveFunction(object):
4867
4939
  stuff = tuple(new_stuff)
4868
4940
 
4869
4941
  return stuff
4942
+
4943
+
4944
+
4945
+
4946
+
4947
+ def _loglik_gradient2(self, betas, stuff, *args, **kwargs):
4948
+
4949
+ return self._loglik_gradient(self, betas, *stuff)
4870
4950
 
4871
4951
 
4952
+
4953
+
4872
4954
  def _loglik_gradient(self, betas, Xd, y, draws=None, Xf=None, Xr=None, batch_size=None, return_gradient=False, return_gradient_n=False, dispersion=0, test_set=0, return_EV=False, verbose=0, corr_list=None, zi_list=None, exog_infl=None, draws_grouped = None, Xgroup = None, model_nature = None, kwarg=None, **kwargs):
4873
4955
  """Fixed and random parameters are handled separately to speed up the estimation and the results are concatenated.
4874
4956
  """
@@ -4894,6 +4976,7 @@ class ObjectiveFunction(object):
4894
4976
 
4895
4977
  penalty = self._penalty_betas(
4896
4978
  betas, dispersion, penalty, float(len(y)/10.0))
4979
+ self.n_obs = len(y) #feeds into gradient
4897
4980
  if draws is None and draws_grouped is None and ('draws_hetro' not in model_nature or model_nature.get('draws_hetro').shape[1]==0) :
4898
4981
 
4899
4982
  if type(Xd) == dict:
@@ -4916,7 +4999,7 @@ class ObjectiveFunction(object):
4916
4999
  penalty += - lindley_disp
4917
5000
  lindley_disp = 0
4918
5001
 
4919
- eVd = self.eXB_calc(Bf, Xd, offset, dispersion, lindley_disp)
5002
+ eVd = self.eXB_calc(Bf, Xd, offset, main_disper, lindley_disp)
4920
5003
 
4921
5004
  if return_EV is True:
4922
5005
 
@@ -4928,16 +5011,38 @@ class ObjectiveFunction(object):
4928
5011
  #self.lam = eVd
4929
5012
 
4930
5013
  if self.is_dispersion(dispersion):
4931
- penalty, betas[-1] = self._penalty_dispersion(dispersion, betas[-1], eVd, y, penalty, model_nature)
5014
+ penalty, main_disper = self._penalty_dispersion(dispersion, main_disper, eVd, y, penalty, model_nature)
4932
5015
 
4933
-
5016
+ betas[-1] = main_disper
4934
5017
  llf_main = self.loglik_obs(
4935
5018
  y, eVd, dispersion, main_disper, lindley_disp, betas)
4936
5019
 
4937
5020
 
4938
5021
  #llf_main = np.clip(llf_main, log_lik_min, log_lik_max)
4939
-
5022
+
4940
5023
  loglik = llf_main.sum()
5024
+ if 'exog_infl' in model_nature:
5025
+ params_infl = betas[Kf:Kf+len(model_nature.get('exog_infl'))]
5026
+ params_main = Bf
5027
+ #ones = np.ones((model_nature.get('exog_inflX').shape[0], model_nature.get('exog_inflX').shape[1], 1))
5028
+ #exog_infl = np.concatenate((ones, model_nature.get('exog_inflX')), axis =2 )
5029
+ exog_infl = model_nature.get('exog_inflX')
5030
+ llf_main = llf_main #TODO test this
5031
+ w = self.predict_logit_part(params_infl, exog_infl)
5032
+
5033
+ w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
5034
+
5035
+
5036
+
5037
+
5038
+ zero_idx = np.nonzero(y == 0)[0]
5039
+ nonzero_idx = np.nonzero(y)[0] #FIXME should shape be unravelled
5040
+
5041
+ llf = np.zeros_like(y, dtype=np.float64).reshape(-1,1) # TODO test this i added ravel to this code
5042
+ llf[zero_idx] = (np.log(w[zero_idx] + (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
5043
+ llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
5044
+ loglik = llf.sum()
5045
+
4941
5046
 
4942
5047
  loglik = np.clip(loglik, log_lik_min, log_lik_max)
4943
5048
  if not np.isreal(loglik):
@@ -4966,7 +5071,7 @@ class ObjectiveFunction(object):
4966
5071
  else:
4967
5072
  return -loglik+penalty
4968
5073
  # Else, we have draws
4969
-
5074
+ self.n_obs = len(y) *self.Ndraws
4970
5075
  penalty = self._penalty_betas(
4971
5076
  betas, dispersion, penalty, float(len(y)/10.0))
4972
5077
 
@@ -5203,9 +5308,28 @@ class ObjectiveFunction(object):
5203
5308
  #lik = np.nan_to_num(lik, )
5204
5309
  loglik = np.log(lik)
5205
5310
  llf_main = loglik
5206
-
5311
+ if 'exog_infl' in model_nature:
5312
+ params_infl = betas[Kf:Kf+len(model_nature.get('exog_infl'))]
5313
+ params_main = Bf
5314
+ exog_infl = model_nature.get('exog_inflX')
5315
+ llf_main = llf_main.ravel() #TODO test this
5316
+ w = self.predict_logit_part(params_infl, exog_infl)
5317
+
5318
+ w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
5319
+
5320
+
5321
+
5322
+
5323
+ zero_idx = np.nonzero(y == 0)[0]
5324
+ nonzero_idx = np.nonzero(y)[0] #FIXME should shape be unravelled
5325
+
5326
+ llf = np.zeros_like(y, dtype=np.float64).reshape(-1,1) # TODO test this i added ravel to this code
5327
+ llf[zero_idx] = (np.log(w[zero_idx] + (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
5328
+ llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
5329
+ loglik = llf.sum()
5330
+ else:
5207
5331
 
5208
- loglik = loglik.sum()
5332
+ loglik = loglik.sum()
5209
5333
 
5210
5334
  loglik = np.clip(loglik, log_lik_min, log_lik_max)
5211
5335
 
@@ -5454,7 +5578,40 @@ class ObjectiveFunction(object):
5454
5578
  return {'success': convergence, 'x': x, 'fun': res, 'message': message,
5455
5579
  'hess_inv': Hinv, 'grad_n': grad_n, 'grad': g, 'nit': nit, 'nfev': nfev, 'njev': njev}
5456
5580
 
5457
- def _minimize(self, loglik_fn, x, args, method, tol, options, bounds = None):
5581
+
5582
+ def numerical_hessian(self, f, x0, eps=1.e-7):
5583
+ """
5584
+ Function to calculate numerical approximation to the Hessian.
5585
+
5586
+ Parameters:
5587
+ f : function
5588
+ The function for which the Hessian should be calculated.
5589
+ x0 : ndarray
5590
+ The point at which the Hessian should be calculated.
5591
+ eps : float
5592
+ The small change in x used to calculate the numerical derivative.
5593
+
5594
+ Returns:
5595
+ H : ndarray
5596
+ Numerical approximation to the Hessian.
5597
+ """
5598
+ n = len(x0)
5599
+ H = np.zeros((n, n))
5600
+ f1 = approx_fprime(x0, f, eps)
5601
+
5602
+ # Iterate over columns
5603
+ for j in range(n):
5604
+ x1 = np.copy(x0)
5605
+ x1[j] += eps
5606
+ f2 = approx_fprime(x1, f, eps)
5607
+ H[:, j] = (f2 - f1)/eps
5608
+
5609
+ return H
5610
+
5611
+
5612
+
5613
+
5614
+ def _minimize(self, loglik_fn, x, args, method, tol, options, bounds = None, hess_calc = None):
5458
5615
 
5459
5616
  if method == "BFGS":
5460
5617
  #return minimize(loglik_fn, x, args=args, jac=args[6], hess=True, method='BFGS', tol=tol, options=options)
@@ -5474,10 +5631,28 @@ class ObjectiveFunction(object):
5474
5631
 
5475
5632
  elif method == 'dogleg' or method == 'trust-exact':
5476
5633
  return minimize(loglik_fn, x, args=args, tol=tol, jac=True, hess='3-point', method='trust-constr', options=options)
5477
- elif method == 'Nelder-Mead':
5478
- return minimize(loglik_fn, x, args=args, method=method, options=options)
5634
+ elif method == 'Nelder-Mead-BFGS':
5635
+ argbs = list(args)
5636
+
5637
+ argbs[6] = False
5638
+ argbs[7] = False
5639
+ argbs = tuple(argbs)
5640
+ result = minimize(loglik_fn, x, args=argbs, method='nelder-mead', options=options)
5641
+
5642
+ # Calculate numerical Hessian
5643
+ if hess_calc is not None:
5644
+ x = result.x
5645
+ H = self.numerical_hessian(lambda x: self._loglik_gradient(x, *argbs), result.x, eps = 1e-7 *self.n_obs)
5646
+ result['Hessian'] = H
5647
+ result['hess_inv'] =np.linalg.pinv(H)
5648
+ print('to do, only if hessian is fhfhfhf')
5649
+ standard_errors = np.sqrt(np.diag(np.linalg.pinv(H)))
5650
+ return result
5651
+ #return minimize(loglik_fn, x, args=args, jac=args[6], hess=args[7], method='BFGS', options= {'gtol':1e-7*self.N}*self.Ndraws)
5652
+ else:
5653
+ return result
5479
5654
  elif method == 'BFGS_2':
5480
- return minimize(loglik_fn, x, args=args, jac=args[6], hess=args[7], method='BFGS', tol=tol, options=options)
5655
+ return minimize(loglik_fn, x, args=args, jac=args[6], hess=args[7], method='BFGS')
5481
5656
  elif method == "L-BFGS-B":
5482
5657
 
5483
5658
  return minimize(loglik_fn, x, args=args, jac=args[6], hess = args[7], method='L-BFGS-B', bounds =bounds, tol=tol, options=options)
@@ -5658,9 +5833,11 @@ class ObjectiveFunction(object):
5658
5833
  for i in coeff_: #pvalue penalty should handle this
5659
5834
  if abs(i) > 120:
5660
5835
  penalty += abs(i)
5661
-
5662
- covariance = self._robust_covariance(optim_res['hess_inv'], optim_res['grad_n']) \
5663
- if robust else optim_res['hess_inv']
5836
+ if 'hess_inv' in optim_res:
5837
+ covariance = self._robust_covariance(optim_res['hess_inv'], optim_res['grad_n']) \
5838
+ if robust else optim_res['hess_inv']
5839
+ else:
5840
+ covariance = np.diag(np.ones(len(optim_res.x)))
5664
5841
  covariance = np.clip(covariance, 0, None)
5665
5842
  stderr = np.sqrt(np.diag(covariance))
5666
5843
  #stderr = [if np.abs(optim_res['x'][i]) >.1 else min(np.abs(optim_res['x'][i]/1.5), stderr[i]) for i in range(len(optim_res['x']))]
@@ -5679,7 +5856,7 @@ class ObjectiveFunction(object):
5679
5856
 
5680
5857
  # if post_cor_pams - post_cor_pams > 1: # if it's only one then we don't technically have any correlations
5681
5858
  # this calculation takes into account the correlated rpms distinct values
5682
- for i in range(0, post_cor_pams):
5859
+ for i in range(pre_cor_pams, post_cor_pams):
5683
5860
 
5684
5861
  stderr[i] = stderr[i]/np.sqrt(sample_size)
5685
5862
 
@@ -5741,6 +5918,7 @@ class ObjectiveFunction(object):
5741
5918
 
5742
5919
  def fitRegression(self, mod,
5743
5920
  dispersion=0, maxiter=2000, batch_size=None, num_hess=False):
5921
+
5744
5922
  """
5745
5923
  Fits a poisson regression given data and outcomes if dispersion is not declared
5746
5924
  if declared, fits a NB (dispersion = 1) regression or GP (disperions = 2)
@@ -5752,7 +5930,9 @@ class ObjectiveFunction(object):
5752
5930
  """
5753
5931
  # Set defualt method
5754
5932
  sub_zi = None
5755
- exog_infl = None
5933
+ exog_infl = None if 'exog_infl' not in mod else mod['exog_infl']
5934
+ inf_betas = 0 if exog_infl is None else len(exog_infl)
5935
+
5756
5936
 
5757
5937
  sol = Solution()
5758
5938
  log_ll = 10 ** 9
@@ -5816,7 +5996,7 @@ class ObjectiveFunction(object):
5816
5996
 
5817
5997
 
5818
5998
  bb = np.random.normal(
5819
- 0.1, 0.05, size=k + kr+kg+kh+dispersion_param_num)
5999
+ 0, 0.01, size=k + kr+kg+kh+dispersion_param_num +inf_betas)
5820
6000
  #bb = np.zeros(k + kr+kg+kh+dispersion_param_num)
5821
6001
 
5822
6002
 
@@ -5896,7 +6076,7 @@ class ObjectiveFunction(object):
5896
6076
  bounds = []
5897
6077
  for i in bb[:-1]:
5898
6078
  bounds = bounds + [(i-30, i+30)]
5899
- bounds =bounds + [(0.25, 10)]
6079
+ bounds =bounds + [(-1, 5)]
5900
6080
 
5901
6081
  elif dispersion == 2:
5902
6082
  bounds = []
@@ -5907,14 +6087,55 @@ class ObjectiveFunction(object):
5907
6087
  else:
5908
6088
  bounds = None
5909
6089
  else:
6090
+ bb[0] = self.constant_value
6091
+ if dispersion ==1:
6092
+ bb[-1] = self.negative_binomial_value
5910
6093
  bounds = None
5911
6094
 
5912
- hess_est = False if method2 == 'L-BFGS-B' else True
5913
- initial_beta = self._minimize(self._loglik_gradient, bb,
5914
- args=(XX, y, None, None, None, None, calc_gradient, hess_est,
5915
- dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod),
5916
- method=method2, tol=1e-5, options={'gtol': tol['gtol']}, bounds = bounds)
5917
6095
 
6096
+ # import numpy as np
6097
+
6098
+ comment_out = 0
6099
+ if comment_out:
6100
+ import rpy2.rinterface as rinterface
6101
+ import rpy2.robjects as robjects
6102
+ from rpy2.robjects import numpy2ri
6103
+ import rpy2.robjects as ro
6104
+ from rpy2.robjects import pandas2ri
6105
+ r = robjects.r
6106
+ numpy2ri.activate()
6107
+ r['source']('testMAX.R')
6108
+ rMAX = robjects.globalenv['LLFUN']
6109
+ hess_est = False
6110
+ args = (XX, y, None, None, None, None, False, hess_est,
6111
+ dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod)
6112
+ #betas = 1
6113
+
6114
+
6115
+ # Store the reference to the function as an instance variable
6116
+ self.loglike = lambda p: self._loglik_gradient(p, *args)
6117
+
6118
+ # Use the instance variable when calling the R function
6119
+ rMAX(self.loglike, start = bb)
6120
+ #loglik = ro.conversion._py2rpy(loglik)
6121
+ #rMAX(loglike, start = bb)
6122
+
6123
+ # Print the result.
6124
+ #print(base.summary(result))
6125
+
6126
+
6127
+
6128
+ hess_est = False if method2 in ['L-BFGS-B', 'BFGS_2'] else True
6129
+
6130
+ #intial_beta = minimize(self._loglik_gradient, bb, args =(XX, y, None, None, None, None, calc_gradient, hess_est, dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod), method = 'nelder-mead', options={'gtol': 1e-7*len(XX)})
6131
+ hess_est = False if method2 in ['L-BFGS-B', 'BFGS_2', 'Nelder-Mead-BFGS'] else True
6132
+ initial_beta = self._minimize(self._loglik_gradient, bb,
6133
+ args=(XX, y, None, None, None, None, calc_gradient, hess_est,
6134
+ dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod),
6135
+ method=method2, tol=1e-5, options={'gtol': tol['gtol']}, bounds = bounds)
6136
+ #a = minimize_parallel(fun=self._loglik_gradient, x0=bb, args=(XX, y, None, None, None, None, calc_gradient, hess_est,
6137
+ # dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod))
6138
+
5918
6139
  if method2 == 'L-BFGS-B':
5919
6140
  if hasattr(initial_beta.hess_inv, 'todense'):
5920
6141
  initial_beta['hess_inv'] = initial_beta.hess_inv.todense() if hasattr(initial_beta.hess_inv, 'todense') else np.array([initial_beta.hess_inv(np.eye(len(bb))[i]) for i in range(len(bb))])
@@ -6019,9 +6240,9 @@ class ObjectiveFunction(object):
6019
6240
 
6020
6241
  while len(b) < self.get_param_num(dispersion):
6021
6242
  if dispersion == 0:
6022
- b = np.append(b, np.random.uniform(0.5, 1))
6243
+ b = np.append(b, np.random.uniform(0.05, 0.1))
6023
6244
  else:
6024
- b = np.insert(b, -1, np.random.uniform(0.5, 1))
6245
+ b = np.insert(b, -1, np.random.uniform(0.05, 0.1))
6025
6246
  if dispersion ==1:
6026
6247
  b[-1] = np.abs(b[-1])
6027
6248
  if b[-1] >10:
@@ -6186,11 +6407,11 @@ class ObjectiveFunction(object):
6186
6407
  kgh = len(mod.get('hetro_hold'))
6187
6408
  draws_hetro = self.prepare_halton(kgh, nh, self.Ndraws, styd, slice_this_way= self.group_halton)
6188
6409
  mod['draws_hetro'] = draws_hetro.copy()
6189
-
6190
- XHtest = mod.get('XH_test')
6191
- nht, pht, ______ = XHtest.shape
6192
- draws_hetro_test = self.prepare_halton(kgh, nht, self.Ndraws, styd, slice_this_way= self.group_halton_test)
6193
- mod['draws_hetro_test'] = draws_hetro_test.copy()
6410
+ if self.is_multi:
6411
+ XHtest = mod.get('XH_test')
6412
+ nht, pht, ______ = XHtest.shape
6413
+ draws_hetro_test = self.prepare_halton(kgh, nht, self.Ndraws, styd, slice_this_way= self.group_halton_test)
6414
+ mod['draws_hetro_test'] = draws_hetro_test.copy()
6194
6415
 
6195
6416
  else:
6196
6417
  draws_hetro = None
@@ -6218,14 +6439,14 @@ class ObjectiveFunction(object):
6218
6439
  mod['dispersion_penalty'] = np.abs(b[-1])
6219
6440
  grad_args = (X, y, draws, X, Xr, self.batch_size,False, False, dispersion, 0, False, 0, self.rdm_cor_fit, self.zi_fit, exog_infl, draws_grouped, XG, mod)
6220
6441
  #self.gradients_est_yes = (1, 1)
6221
- if len(b) ==2:
6222
- print(1)
6442
+
6443
+
6223
6444
 
6224
6445
  if draws is None and draws_hetro is not None:
6225
6446
  print('hold')
6226
6447
  betas_est = self._minimize(self._loglik_gradient, b, args=(X, y, draws, X, Xr, self.batch_size,self.grad_yes, self.hess_yes, dispersion, 0, False, 0, self.rdm_cor_fit, self.zi_fit, exog_infl, draws_grouped, XG, mod),
6227
6448
  method=method2, tol=tol['ftol'],
6228
- options={'gtol': tol['gtol']}, bounds = bounds)
6449
+ options={'gtol': tol['gtol']}, bounds = bounds, hess_calc = True if method2 == 'Nelder-Mead-BFGS' else False)
6229
6450
 
6230
6451
 
6231
6452
  #self.numerical_hessian_calc = True
@@ -6434,8 +6655,8 @@ class ObjectiveFunction(object):
6434
6655
  self.rdm_cor_fit = [x for x, y in zip(
6435
6656
  select_data, model_nature.get('alpha_cor_rdm')) if y == 1]
6436
6657
 
6437
-
6438
- # [x for x, y in zip(select_data, model_nature.get('hurdle_terms')) if y == 1]
6658
+ #if self.zi_force:
6659
+ #self.zi_fit = [x for x, y in zip(select_data, model_nature.get('exog_infl')) if y == 1]
6439
6660
  #if alpha_grouped is not None:
6440
6661
  self.grouped_rpm = [x for x, y in zip(select_data, model_nature.get('alpha_grouped')) if y == 1]
6441
6662
  self.hetro_fit = [x for x, y in zip(select_data, model_nature.get('alpha_hetro')) if y == 1]
@@ -6620,6 +6841,11 @@ class ObjectiveFunction(object):
6620
6841
  #indices7 = layout[:]
6621
6842
  indices = self.get_named_indices(self.fixed_fit)
6622
6843
  indices5 = self.get_named_indices(self.hetro_fit)
6844
+
6845
+ if self.zi_force:
6846
+ indices6 = self.get_named_indices(self.zi_force_names)
6847
+ model_nature['exog_inflX'] = df_tf[:, :, indices6]
6848
+
6623
6849
  x_h_storage = []
6624
6850
  x_h_storage_test = []
6625
6851
  transform_hetro = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: metacountregressor
3
- Version: 0.1.48
3
+ Version: 0.1.50
4
4
  Summary: A python package for count regression of rare events assisted by metaheuristics
5
5
  Author: zahern
6
6
  Author-email: zeke.ahern@hdr.qut.edu.au
@@ -18,7 +18,13 @@ Requires-Dist: statsmodels (>=0.14.0,<0.15.0)
18
18
  Requires-Dist: tabulate (>=0.9.0,<0.10.0)
19
19
  Description-Content-Type: text/markdown
20
20
 
21
- ##### The Below code demonstrates how to set up automatic optimization assisted by the harmony search algorithm. References to the Differential Evolution and Simulated Annealing has been mentioned (change accordingly)
21
+ <div style="display: flex; align-items: center;">
22
+ <img src="https://github.com/zahern/data/raw/main/m.png" alt="My Image" style="width: 200px; margin-right: 20px;">
23
+ <p><span style="font-size: 60px;"><strong>MetaCountRegressor</strong></span></p>
24
+ </div>
25
+
26
+ ##### Quick Setup
27
+ The Below code demonstrates how to set up automatic optimization assisted by the harmony search algorithm. References to the Differential Evolution and Simulated Annealing has been mentioned (change accordingly)
22
28
 
23
29
  ## Quick install: Requires Python 3.10
24
30
 
@@ -37,7 +43,8 @@ from metacountregressor.metaheuristics import (harmony_search,
37
43
  simulated_annealing)
38
44
  ```
39
45
 
40
- #### Basic setup. Read in data, and select optimization algorithm. New solutiosn will be evaluated overtime. At the end of the runtime, the best solution will be reported
46
+ #### Basic setup.
47
+ The initial setup involves reading in the data and selecting an optimization algorithm. As the runtime progresses, new solutions will be continually evaluated. Finally, at the end of the runtime, the best solution will be identified and printed out. In the case of multiple objectives all of the best solutions will be printed out that belong to the Pareto frontier.
41
48
 
42
49
 
43
50
  ```python
@@ -50,8 +57,9 @@ X['Offset'] = np.log(df['AADT']) # Explicitley define how to offset the data, no
50
57
  # Drop Y, selected offset term and ID as there are no panels
51
58
  X = df.drop(columns=['FREQ', 'ID', 'AADT'])
52
59
 
53
- #some example argument, these are defualt so the following line is just for claritity
54
- arguments = {'algorithm': 'hs', 'test_percentage': 0.15, 'test_complexity': 6, 'instance_number':1, '_mpai':1, 'val_percentage':0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "MAX_TIME": 6}
60
+ #some example argument, these are defualt so the following line is just for claritity. See the later agruments section for detials.
61
+ arguments = {'algorithm': 'hs', 'test_percentage': 0.15, 'test_complexity': 6, 'instance_number':1,
62
+ 'val_percentage':0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "MAX_TIME": 6}
55
63
  # Fit the model with metacountregressor
56
64
  obj_fun = ObjectiveFunction(X, y, **arguments)
57
65
  #replace with other metaheuristics if desired
@@ -60,37 +68,37 @@ results = harmony_search(obj_fun)
60
68
 
61
69
  ```
62
70
 
63
- ## Arguments to feed into the Objective Function
64
- ### Reduce the lisst down into single elements to control the optimization routine
71
+ ## Arguments to feed into the Objective Function:
72
+ ###
73
+ Note: Please Consider the main arguments to change.
74
+
75
+ - `algorithm`: This parameter has multiple choices for the algorithm, such as 'hs', 'sa', and 'de'. Only one choice should be defined as a string value.
76
+ - `test_percentage`: This parameter represents the percentage of data used for in-sample prediction of the model. The value 0.15 corresponds to 15% of the data.
77
+ - `val_percentage`: This parameter represents the percentage of data used to validate the model. The value 0.15 corresponds to 15% of the data.
78
+ - `test_complexity`: This parameter defines the complexity level for testing. The value 6 tests all complexities. Alternatively, you can provide a list of numbers to consider different complexities. The complexities are further explained later in this document.
79
+ - `instance_number`: This parameter is used to give a name to the outputs.
80
+ - `obj_1`: This parameter has multiple choices for obj_1, such as 'bic', 'aic', and 'hqic'. Only one choice should be defined as a string value.
81
+ - `_obj_2`: This parameter has multiple choices for objective 2, such as 'RMSE_TEST', 'MSE_TEST', and 'MAE_TEST'.
82
+ - `MAX_TIME`: This parameter specifies the maximum number of seconds for the total estimation before stopping.
83
+ - `distribution`: This parameter is a list of distributions to consider. Please select all of the available options and put them into a list of valid options if you want to to consider the distribution type for use when modellign with random parameters. The valid options include: 'Normal', 'LnNormal', 'Triangular', and 'Uniform'.
84
+ - `transformations`: This parameters is a list of transformations to consider. Plesee select all of the available options and put them into a list of valid options if you want to consider the transformation type. The valid options include 'Normal', 'LnNormal', 'Triangular', 'Uniform'.
65
85
 
66
86
 
67
- ```python
68
- arguments = {
69
- 'algorithm': ['hs', 'sa', 'de'], # Multiple choices for algorithm
70
- 'test_percentage': 0.15, #data used to in sample predict the model, where 1 represents 100% of the data
71
- 'val_percentage': 0.15, #data used to validate the model, where 1 represents 100% of the data
72
- 'test_complexity': 6,
73
- 'instance_number': 1, #Used for giving a name to your outputs
74
- '_mpai': 1,
75
- 'obj_1': ['bic', 'aic', 'hqic'], # Multiple choices for obj_1
76
- '_obj_2': ['RMSE_TEST', 'MSE_TEST', 'MAE_TEST'], # Multiple choices for objecttive 2
77
- 'MAX_TIME': 10 # Number of seconds for total estimation before
78
- }
79
- ```
80
87
 
81
- ### Change the arguments.
82
- #### Reduce down the list sizes where necsessary
88
+ ### An Example of changing the arguments.
89
+ Modify the arguments according to your preferences using the commented code as a guide.
83
90
 
84
91
 
85
92
  ```python
86
93
  #Solution Arguments
87
94
  arguments = {
88
- 'algorithm': 'hs',
89
- 'test_percentage': 0.2,
90
- 'test_complexity': 6, #or list based [0, 1, 2, 6]
91
- 'instance_number': 'name',
95
+ 'algorithm': 'hs', #alternatively input 'de', or 'sa'
92
96
  'is_multi': 1,
93
- 'distribution': ['Normal', 'LnNormal', 'Triangular', 'Unifrom'],
97
+ 'test_percentage': 0.2, # used in multi-objective optimisation only. Saves 20% of data for testing.
98
+ 'val_percenetage:': 0.2, # Saves 20% of data for testing.
99
+ 'test_complexity': 6, # Complexity level for testing (6 tests all) or a list to consider potential differences in complexity
100
+ 'instance_number': 'name', # used for creeating a named folder where your models are saved into from the directory
101
+ 'distribution': ['Normal', 'LnNormal', 'Triangular', 'Uniform'],
94
102
  'Model': [0,1], # or equivalently ['POS', 'NB']
95
103
  'transformations': ['no', 'sqrt', 'archsinh'],
96
104
  '_max_time': 10
@@ -100,6 +108,7 @@ results = harmony_search(obj_fun)
100
108
  ```
101
109
 
102
110
  ## Initial Solution Configurement
111
+ Listed below is an example of how to specify an initial solution within the framework. This initial solution will be used to calculate the fitness and considered in the objective-based search. However, as the search progresses, different hypotheses may be proposed, and alternative modeling components may completely replace the initial solution.
103
112
 
104
113
 
105
114
  ```python
@@ -113,7 +122,6 @@ manual_fit_spec = {
113
122
  'transformations': ['no', 'no', 'log', 'no', 'no', 'no', 'no'],
114
123
  'dispersion': 1
115
124
  }
116
-
117
125
  #Search Arguments
118
126
  arguments = {
119
127
  'algorithm': 'hs',
@@ -125,15 +133,15 @@ arguments = {
125
133
  obj_fun = ObjectiveFunction(X, y, **arguments)
126
134
  ```
127
135
 
128
- ### simarly to return the results feed the objective function into a metaheuristic solution algorithm. An example of this is provided below:
129
- results = harmony_search(obj_fun)
136
+ simarly to return the results feed the objective function into a metaheuristic solution algorithm. An example of this is provided below:
130
137
 
131
138
 
132
139
  ```python
133
-
140
+ results = harmony_search(obj_fun)
141
+ print(results)
134
142
  ```
135
143
 
136
- ## Notes
144
+ ## Notes:
137
145
  ### Capabilities of the software include:
138
146
  * Handling of Panel Data
139
147
  * Support for Data Transformations
@@ -151,13 +159,21 @@ Capability to handle heterogeneity in the means of the random parameters
151
159
  * Customization of Hyper-parameters to solve problems tailored to your dataset
152
160
  * Out-of-the-box optimization capability using default metaheuristics
153
161
 
162
+ ### Intreting the output of the model:
163
+ A regression table is produced. The following text elements are explained:
164
+ - Std. Dev.: This column appears for effects that are related to random paramters and displays the assument distributional assumption next to it
165
+ - Chol: This term refers to Cholesky decomposition element, to show the correlation between two random paramaters. The combination of the cholesky element on iyself is equivalent to a normal random parameter.
166
+ - hetro group #: This term represents the heterogeneity group number, which refers all of the contributing factors that share hetrogentiy in the means to each other under the same numbered value.
167
+ - $\tau$: This column, displays the type of transformation that was applied to the specific contributing factor in the data.
168
+
169
+
154
170
  ## Arguments:
155
171
  #### In reference to the arguments that can be fed into the solution alrogithm, a dictionary system is utilised with relecant names these include
156
172
 
157
173
 
158
174
  The following list describes the arguments available in this function. By default, all of the capabilities described are enabled unless specified otherwise as an argument. For list arguments, include all desired elements in the list to ensure the corresponding options are considered. Example code will be provided later in this guide.
159
175
 
160
- 1. **`complexity_level`**: This argument accepts an integer between 0 to 5 or a list of such integers. Each integer represents a hierarchy level for estimable models associated with each explanatory variable. Here is a summary of the hierarchy:
176
+ 1. **`complexity_level`**: This argument accepts an integer 1-6 or a list based of integegers between 0 to 5 eg might be a possible configuration [0, 2, 3]. Each integer represents a hierarchy level for estimable models associated with each explanatory variable. Here is a summary of the hierarchy:
161
177
  - 0: Null model
162
178
  - 1: Simple fixed effects model
163
179
  - 2: Random parameters model
@@ -165,6 +181,10 @@ The following list describes the arguments available in this function. By defaul
165
181
  - 4: Grouped random parameters model
166
182
  - 5: Heterogeneity in the means random parameter model
167
183
 
184
+ **Note:** For the grouped random parameters model, groupings need to be defined prior to estimation. This can be achieved by including the following key-value pair in the arguments of the `ObjectiveFunction`: `'group': "Enter Column Grouping in data"`. Replace `"Enter Column Grouping in data"` with the actual column grouping in your dataset.
185
+
186
+ Similarly, for panel data, the panel column needs to be defined using the key-value pair: `'panel': "enter column string covering panels"`. Replace `"enter column string covering panels"` with the appropriate column string that represents the panel information in your dataset.
187
+
168
188
  2. **`distributions`**: This argument accepts a list of strings where each string corresponds to a distribution. Valid options include:
169
189
  - "Normal"
170
190
  - "Lindley"
@@ -185,13 +205,15 @@ The following list describes the arguments available in this function. By defaul
185
205
  - "square-root"
186
206
  - "logarithmic"
187
207
  - "archsinh"
188
- - "as factor"
208
+ - "as_factor"
189
209
 
190
210
  5. **`is_multi`**: This argument accepts an integer indicating whether single or multiple objectives are to be tested (0 for single, 1 for multiple).
191
211
 
192
- 6. **`testing_split`**: This argument is used for multi-objective optimization. Define it as a decimal; for example, 0.2 represents 20% of the data for testing.
212
+ 6. **`test_percentage`**: This argument is used for multi-objective optimization. Define it as a decimal; for example, 0.2 represents 20% of the data for testing.
213
+
214
+ 7. **`val_percentage`**: This argument saves data for validation. Define it as a decimal; for example, 0.2 represents 20% of the data for validation.
193
215
 
194
- 7. **`_max_time`**: This argument is used to add a termination time in the algorithm. It takes values as seconds. Note the time is only dependenant on the time after intial population of solutions are generated.
216
+ 8. **`_max_time`**: This argument is used to add a termination time in the algorithm. It takes values as seconds. Note the time is only dependenant on the time after intial population of solutions are generated.
195
217
 
196
218
  ## Contact
197
219
  If you have any questions, ideas to improve MetaCountRegressor, or want to report a bug, just open a new issue in [GitHub repository](https://github.com/zahern/CountDataEstimation).
@@ -1,8 +1,9 @@
1
1
  metacountregressor/1848.csv,sha256=EMXrgQsLrOwEIYMBIV0XLAnXPpzwiQRWoxg1ueL_k7U,49365
2
2
  metacountregressor/1h_syntth_please.ipynb,sha256=RI7S3TUx-FafrGrYaPtoCOHjbtAtTDpfif6Ka2bRJbs,25000
3
3
  metacountregressor/4000.csv,sha256=lTCmjN50e05DoPFPnAR1M7o-ECt5Su55T20p2BZ2HzY,135986
4
- metacountregressor/__init__.py,sha256=Mt9_rm0pJVqyDYdo_qVIejZt5H6wdfzq_k8tHTIqxh4,208
5
- metacountregressor/_device_cust.py,sha256=Ek-dgesZ5zZ8UeJA4hh3s9Q7Bb2duDwIwfWEaAouDNg,2074
4
+ metacountregressor/__init__.py,sha256=v7LfPyo5S2f0CR4tD0vX5zxm7HuHKFiKgxUnW5Oyk9M,2924
5
+ metacountregressor/_device_cust.py,sha256=2KcXnDUrzwMGMdgC7XEzqwJbMeIF9emY2WzFvJnN2co,2113
6
+ metacountregressor/alog.png,sha256=1G3vajvqzTphv6BxKeEV5t_axUq2zrT0iWy1ZblMYVQ,965927
6
7
  metacountregressor/artificial_1h_mixed_corr_2023_MOOF.csv,sha256=PbgmFs3xaPzjFD7aALDWzaYhEyLKKBC-w_9DfyHV-gc,433769
7
8
  metacountregressor/artificial_mixed_corr_2023_MOOF.csv,sha256=P-211ts-GR_uxlR_TbuuNwa1lTLcEJVMVAwTltFGDFc,7343652
8
9
  metacountregressor/artificial_ZA.csv,sha256=w8GBj586DzDfKOn8hwHDNkLeBz5si0qopht1JjAWaTU,3713176
@@ -38,7 +39,7 @@ metacountregressor/set_data.csv,sha256=68iJkW4O4HVM8GyNlO0drwp8ZMXkccXCUc7jnA8xn
38
39
  metacountregressor/set_data_s.csv,sha256=hELwnv6RjpmXcMheFafwrYbLbyYE21hFKyqJhA8L05o,11111
39
40
  metacountregressor/setup.py,sha256=CpbdBScFhvStc6WByFiAlP7T5wGdWetsLI8X5JRRpP4,268
40
41
  metacountregressor/single_objective_finder.py,sha256=QYXUpxJp7-ul5ZiIKGgYGaH_yFFGUbI7X3yKu5asogE,1960
41
- metacountregressor/solution.py,sha256=3QAkY4aq0vT0wWagbeivNtCzJl1MeDZLO4ohrNQe8DU,303421
42
+ metacountregressor/solution.py,sha256=_psVXiW-yBi4WqdI3a3aD4tIA2m_jGnC3dmG121JtLo,313756
42
43
  metacountregressor/Stage5A_1848_All_Initial_Columns.csv,sha256=uwsadEyupgIH1w5f8vnlwlo13ryww3VCGYlOnN0dEL0,188769
43
44
  metacountregressor/Stage5A_1848_All_Initial_Columns.xlsx,sha256=5U5Ab1jjGi5qoKp06Bw2tpdPjGaDGoyt5976AAFdEbs,699231
44
45
  metacountregressor/synth_dataset_generator.ipynb,sha256=caBMQJOaeINPZJw5aTsSOXhmenSqrpS7GycINAzUUxs,27153
@@ -48,6 +49,6 @@ metacountregressor/testML.R,sha256=UbTsLFUhoJG9bJnU2rbUKlfcprAkROnhREK41qKzbvQ,2
48
49
  metacountregressor/TestSetFake.csv,sha256=JPYAWYLAw7rgQHdGTz0rltMfapX8QYt3BVSyK_D-Lzg,1640
49
50
  metacountregressor/ThaiAccident.csv,sha256=NIi_uPyo5u-B6Hj0Ln9xuJ8fnvGbWK9GLdTWdpG5uug,418202
50
51
  metacountregressor/tk_app.py,sha256=0UM76hpQ-ha96ma_Z5ryxYQUSdF4PJBCsLuI1EGu6_E,59490
51
- metacountregressor-0.1.48.dist-info/METADATA,sha256=d02v5aP2UstLeg28sc8zDBOzsfBt4G_ywe0rTQIeJK8,9071
52
- metacountregressor-0.1.48.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
53
- metacountregressor-0.1.48.dist-info/RECORD,,
52
+ metacountregressor-0.1.50.dist-info/METADATA,sha256=1pSL5ALid4u59pWYEcREpyWEdE0YpCD_sTpqWhvjDZU,13112
53
+ metacountregressor-0.1.50.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
54
+ metacountregressor-0.1.50.dist-info/RECORD,,