metacountregressor 0.1.176__tar.gz → 0.1.203__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {metacountregressor-0.1.176 → metacountregressor-0.1.203}/PKG-INFO +2 -2
  2. {metacountregressor-0.1.176 → metacountregressor-0.1.203}/metacountregressor/helperprocess.py +11 -1
  3. {metacountregressor-0.1.176 → metacountregressor-0.1.203}/metacountregressor/solution.py +143 -29
  4. {metacountregressor-0.1.176 → metacountregressor-0.1.203}/metacountregressor.egg-info/PKG-INFO +2 -2
  5. {metacountregressor-0.1.176 → metacountregressor-0.1.203}/metacountregressor.egg-info/requires.txt +1 -1
  6. {metacountregressor-0.1.176 → metacountregressor-0.1.203}/setup.py +1 -1
  7. {metacountregressor-0.1.176 → metacountregressor-0.1.203}/LICENSE.txt +0 -0
  8. {metacountregressor-0.1.176 → metacountregressor-0.1.203}/README.rst +0 -0
  9. {metacountregressor-0.1.176 → metacountregressor-0.1.203}/metacountregressor/__init__.py +0 -0
  10. {metacountregressor-0.1.176 → metacountregressor-0.1.203}/metacountregressor/_device_cust.py +0 -0
  11. {metacountregressor-0.1.176 → metacountregressor-0.1.203}/metacountregressor/app_main.py +0 -0
  12. {metacountregressor-0.1.176 → metacountregressor-0.1.203}/metacountregressor/data_split_helper.py +0 -0
  13. {metacountregressor-0.1.176 → metacountregressor-0.1.203}/metacountregressor/halton.py +0 -0
  14. {metacountregressor-0.1.176 → metacountregressor-0.1.203}/metacountregressor/main.py +0 -0
  15. {metacountregressor-0.1.176 → metacountregressor-0.1.203}/metacountregressor/main_old.py +0 -0
  16. {metacountregressor-0.1.176 → metacountregressor-0.1.203}/metacountregressor/metaheuristics.py +0 -0
  17. {metacountregressor-0.1.176 → metacountregressor-0.1.203}/metacountregressor/pareto_file.py +0 -0
  18. {metacountregressor-0.1.176 → metacountregressor-0.1.203}/metacountregressor/pareto_logger__plot.py +0 -0
  19. {metacountregressor-0.1.176 → metacountregressor-0.1.203}/metacountregressor/setup.py +0 -0
  20. {metacountregressor-0.1.176 → metacountregressor-0.1.203}/metacountregressor/single_objective_finder.py +0 -0
  21. {metacountregressor-0.1.176 → metacountregressor-0.1.203}/metacountregressor/test_generated_paper2.py +0 -0
  22. {metacountregressor-0.1.176 → metacountregressor-0.1.203}/metacountregressor.egg-info/SOURCES.txt +0 -0
  23. {metacountregressor-0.1.176 → metacountregressor-0.1.203}/metacountregressor.egg-info/dependency_links.txt +0 -0
  24. {metacountregressor-0.1.176 → metacountregressor-0.1.203}/metacountregressor.egg-info/not-zip-safe +0 -0
  25. {metacountregressor-0.1.176 → metacountregressor-0.1.203}/metacountregressor.egg-info/top_level.txt +0 -0
  26. {metacountregressor-0.1.176 → metacountregressor-0.1.203}/setup.cfg +0 -0
  27. {metacountregressor-0.1.176 → metacountregressor-0.1.203}/tests/test.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: metacountregressor
3
- Version: 0.1.176
3
+ Version: 0.1.203
4
4
  Summary: Extensive Testing for Estimation of Data Count Models
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern
@@ -16,7 +16,7 @@ Requires-Dist: latextable
16
16
  Requires-Dist: pandas
17
17
  Requires-Dist: scikit_learn>=1.4.1.post1
18
18
  Requires-Dist: statsmodels
19
- Requires-Dist: psustil
19
+ Requires-Dist: psutil
20
20
  Dynamic: author
21
21
  Dynamic: author-email
22
22
  Dynamic: description
@@ -271,7 +271,17 @@ def guess_low_medium_high(column_name, series):
271
271
  # Compute the tertiles (33rd and 66th percentiles)
272
272
  #print('did it make it...')
273
273
  #mode_value = st.mode(series) # Get the most frequent value
274
- #print('good')
274
+ #i dont think this works cayse its not a seriers any other way
275
+ is_binary = series.isin([0, 1]).all()
276
+ if is_binary:
277
+ return {
278
+ 'type': 'binary',
279
+ 'bins': [0,1],
280
+ 'labels': ['Off', 'On'],
281
+ 'prefix': f'{column_name}'
282
+
283
+ }
284
+
275
285
  # series = pd.to_numeric(series, errors='coerce').fillna(mode_value)
276
286
  low_threshold = np.quantile(series, 0.33)
277
287
  high_threshold = np.quantile(series,0.66)
@@ -159,7 +159,7 @@ class ObjectiveFunction(object):
159
159
  self.full_model = None
160
160
  self.GP_parameter = 0
161
161
  self.is_multi = kwargs.get('is_multi', False)
162
- self.complexity_level = 6
162
+ self.complexity_level = kwargs.get('complexity_level', 6)
163
163
  self._max_iterations_improvement = 10000
164
164
  self.generated_sln = set()
165
165
  self.ave_mae = 0
@@ -256,10 +256,11 @@ class ObjectiveFunction(object):
256
256
  self.is_multi = False
257
257
 
258
258
  if 'panels' in kwargs and not (kwargs.get('panels') == None):
259
- self.group_names = np.asarray(x_data[kwargs['group']].astype('category').cat._parent.dtype.categories)
259
+ if kwargs.get('group') is not None:
260
+ self.group_names = np.asarray(x_data[kwargs['group']].astype('category').cat._parent.dtype.categories)
260
261
 
261
- x_data[kwargs['group']] = x_data[kwargs['group']].astype(
262
- 'category').cat.codes
262
+ x_data[kwargs['group']] = x_data[kwargs['group']].astype(
263
+ 'category').cat.codes
263
264
  self.complexity_level = 6
264
265
  # create test dataset
265
266
 
@@ -309,10 +310,13 @@ class ObjectiveFunction(object):
309
310
  df_train[kwargs['panels']]) if kwargs['panels'] is not None else None
310
311
  self.ids_test = np.asarray(
311
312
  df_test[kwargs['panels']]) if kwargs['panels'] is not None else None
312
- groupll = np.asarray(df_train[kwargs['group']].astype(
313
- 'category').cat.codes)
314
- group_test = np.asarray(df_test[kwargs['group']].astype(
315
- 'category').cat.codes)
313
+ if kwargs.get('group') is not None:
314
+ groupll = np.asarray(df_train[kwargs['group']].astype(
315
+ 'category').cat.codes)
316
+ group_test = np.asarray(df_test[kwargs['group']].astype(
317
+ 'category').cat.codes)
318
+ else:
319
+ groupll = None
316
320
  X, Y, panel, group = self._arrange_long_format(
317
321
  df_train, y_train, self.ids, self.ids, groupll)
318
322
  self.group_halton = group.copy()
@@ -501,7 +505,7 @@ class ObjectiveFunction(object):
501
505
  self._max_hurdle = 4
502
506
 
503
507
  #Manually fit from analyst specification
504
- manual_fit = kwargs.get('Manual_Fit')
508
+ manual_fit = kwargs.get('Manual_Fit', None)
505
509
  if manual_fit is not None:
506
510
  print('fitting manual')
507
511
  self.process_manual_fit(manual_fit)
@@ -538,7 +542,7 @@ class ObjectiveFunction(object):
538
542
  if self.is_multi:
539
543
  self._offsets_test = self._x_data_test[:, :, val_od]
540
544
  self._x_data_test = self.remove_offset(self._x_data_test, val_od)
541
- print(self._offsets)
545
+ #print(self._offsets)
542
546
  else:
543
547
  self.initialize_empty_offsets()
544
548
 
@@ -1712,6 +1716,11 @@ class ObjectiveFunction(object):
1712
1716
  vector[get_rdm_i] -= 1
1713
1717
  only_ints_vals[get_rdm_i] -= 1
1714
1718
 
1719
+ elif vector[get_rdm_i] == 1:
1720
+ vector[get_rdm_i] -= 1
1721
+ only_ints_vals[get_rdm_i] -= 1
1722
+
1723
+
1715
1724
  if vector.count(5) == 1:
1716
1725
  idx = vector.index(5)
1717
1726
  vector[idx] = 0
@@ -2361,7 +2370,7 @@ class ObjectiveFunction(object):
2361
2370
  sorted(my_dict, key=lambda x: x[0]['pval_percentage'])
2362
2371
 
2363
2372
  def get_fitness(self, vector, multi=False, verbose=False, max_routine=3):
2364
- obj_1 = 10.0 ** 5
2373
+ obj_1 = 10.0 ** 4
2365
2374
  obj_best = None
2366
2375
  sub_slns = list()
2367
2376
 
@@ -2369,12 +2378,14 @@ class ObjectiveFunction(object):
2369
2378
  vector) # just added to grab the fixed fit TODO: Clean up
2370
2379
  dispersion = model_nature.get('dispersion')
2371
2380
  self.define_selfs_fixed_rdm_cor(model_nature)
2381
+
2372
2382
  try:
2373
2383
  self.repair(vector)
2374
2384
  except Exception as e:
2375
- print('prolem repairing here')
2385
+ print('problem repairing here')
2376
2386
  print(vector)
2377
2387
  print(e)
2388
+
2378
2389
  layout = vector.copy()
2379
2390
  trial_run = 0
2380
2391
  max_trial = 0
@@ -2453,10 +2464,10 @@ class ObjectiveFunction(object):
2453
2464
 
2454
2465
 
2455
2466
  if not self.is_quanitifiable_num(obj_1[self._obj_1]):
2456
- obj_1[self._obj_1] = 10 ** 9
2467
+ obj_1[self._obj_1] = 10 ** 5
2457
2468
  else:
2458
2469
  if obj_1[self._obj_1] <= 0:
2459
- obj_1[self._obj_1] = 10 ** 9
2470
+ obj_1[self._obj_1] = 10 ** 5
2460
2471
 
2461
2472
  if multi:
2462
2473
 
@@ -2487,10 +2498,10 @@ class ObjectiveFunction(object):
2487
2498
 
2488
2499
  self.reset_sln()
2489
2500
  if not self.is_quanitifiable_num(obj_1[self._obj_1]):
2490
- obj_1[self._obj_1] = 10 ** 9
2501
+ obj_1[self._obj_1] = 10 ** 5
2491
2502
  else:
2492
2503
  if obj_1[self._obj_1] == 0:
2493
- obj_1[self._obj_1] = 10 ** 9
2504
+ obj_1[self._obj_1] = 10 **5
2494
2505
  if verbose:
2495
2506
  print('The best solution iteratively is of objective value:', obj_1)
2496
2507
 
@@ -3029,6 +3040,39 @@ class ObjectiveFunction(object):
3029
3040
  # print('log_lik poisson', log_lik)
3030
3041
  return -log_lik
3031
3042
 
3043
+ def extract_parameters(self, betas, Kf, Kr, Kchol_a, Krb_a):
3044
+ """
3045
+ Extracts parameters from the `betas` array based on the given sizes.
3046
+
3047
+ Parameters:
3048
+ betas (numpy.ndarray): The array of betas.
3049
+ Kf (int): Size of Bf (first Kf elements of betas).
3050
+ Kr (int): Size of Br.
3051
+ Kchol_a (int): Part of the size for brstd.
3052
+ Krb_a (int): Part of the size for brstd.
3053
+
3054
+ Returns:
3055
+ tuple: A tuple containing:
3056
+ - Bf (numpy.ndarray): The first Kf elements of betas.
3057
+ - Br (numpy.ndarray): The next Kr elements of betas after Bf.
3058
+ - brstd (numpy.ndarray): The next Kchol_a + Krb_a elements of betas after Br.
3059
+ - remaining_betas (numpy.ndarray): Any remaining elements in betas after brstd.
3060
+ """
3061
+ # Step 1: Extract Bf
3062
+ Bf = betas[:Kf] # First Kf elements
3063
+
3064
+ # Step 2: Extract Br
3065
+ Br = betas[Kf:Kf + Kr] # Next Kr elements after Bf
3066
+
3067
+ # Step 3: Extract brstd
3068
+ brstd_size = Kchol_a + Krb_a # Total size of brstd
3069
+ brstd = betas[Kf + Kr:Kf + Kr + brstd_size] # Next brstd_size elements after Br
3070
+
3071
+ # Step 4: Extract remaining betas
3072
+ remaining_betas = betas[Kf + Kr + brstd_size:] # Remaining elements in betas
3073
+
3074
+ return Bf, Br, brstd, remaining_betas
3075
+
3032
3076
  def convert_nbinom_params(self, mu, theta):
3033
3077
  """
3034
3078
  Convert mean/dispersion parameterization of a negative binomial to the ones scipy supports
@@ -3561,8 +3605,11 @@ class ObjectiveFunction(object):
3561
3605
  # Compute: betas = mean + sd*draws
3562
3606
  if len(br_sd) != draws.shape[1]:
3563
3607
  #get the same size as the mean
3564
- betas_random = self.Br.copy()
3565
-
3608
+ #if hasattr(self.Br):
3609
+ # betas_random = self.Br.copy()
3610
+ #else:
3611
+ idx = self.get_X_draw_tril()
3612
+ betas_random = br_mean[None, :, None] + draws[:,idx, :] * br_sd[None, :, None]
3566
3613
  '''
3567
3614
  c = self.get_num_params()[3:5]
3568
3615
 
@@ -4716,10 +4763,10 @@ class ObjectiveFunction(object):
4716
4763
  n_coeff = self.get_param_num(dispersion)
4717
4764
  Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh = self.get_num_params()
4718
4765
  if Kchol_a != Kchol:
4719
- print('hold')
4766
+ print('hold qhy')
4720
4767
 
4721
4768
  if Kr_b != Kr_b_a:
4722
- print('hold')
4769
+ print('hold qhy')
4723
4770
 
4724
4771
 
4725
4772
 
@@ -4735,13 +4782,32 @@ class ObjectiveFunction(object):
4735
4782
  Bf = betas[0:Kf] # Fixed betas
4736
4783
 
4737
4784
 
4738
-
4785
+ # Bf_new, Br_new, Br_std_new, Br_rema = self.extract_parameters(betas, Kf, Kr, Kchol_a, Kr_b_a)
4739
4786
 
4740
4787
 
4741
4788
  Vdf = dev.np.einsum('njk,k -> nj', Xdf, Bf, dtype=np.float64) # (N, P)
4742
4789
  br = betas[Kf:Kf + Kr]
4743
4790
 
4791
+
4792
+
4793
+ #i have an array of betas, Kf represents the first kf of the betas array
4794
+ # now return Bf where size of bf = kf
4795
+
4796
+ # size of br needs to be Kr
4797
+ #Kr
4798
+ #now extract from betas, after all the Bf
4799
+ # cakk
4800
+
4801
+ #the next array is brstd
4802
+
4803
+ # size of brstd needs to be
4804
+ # Kchol_a + Krb_a
4805
+ #its grabbing from the
4806
+
4807
+
4808
+
4744
4809
  brstd = betas[Kf + Kr:Kf + Kr + Kr_b + Kchol]
4810
+
4745
4811
  # initialises size matrix
4746
4812
  proba = [] # Temp batching storage
4747
4813
 
@@ -4755,6 +4821,8 @@ class ObjectiveFunction(object):
4755
4821
  if len(self.none_handler(self.rdm_cor_fit)) == 0:
4756
4822
  # Br = self._transform_rand_betas(br, np.abs(
4757
4823
  # brstd), draws_) # Get random coefficients, old method
4824
+ #TODO
4825
+
4758
4826
  Br = self._transform_rand_betas(br,
4759
4827
  brstd, draws_) # Get random coefficients
4760
4828
  self.naming_for_printing(betas, dispersion=dispersion, model_nature=model_nature)
@@ -5844,6 +5912,11 @@ class ObjectiveFunction(object):
5844
5912
 
5845
5913
  if self.no_extra_param:
5846
5914
  dispersion_poisson = 0
5915
+ print('b :', len(b))
5916
+ print(self.get_param_num())
5917
+ baby = self.get_param_num()
5918
+ if len(b) != baby:
5919
+ print('modify')
5847
5920
  betas_est = self._minimize(self._loglik_gradient, b, args=(
5848
5921
  X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion_poisson, 0, False, 0,
5849
5922
  self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
@@ -6205,8 +6278,9 @@ class ObjectiveFunction(object):
6205
6278
  transform, distribution, None, dispersion=dispersion)
6206
6279
 
6207
6280
  def get_named_indices(self, names):
6208
- indices = [i for i, name in enumerate(self._characteristics_names) if name in names]
6209
-
6281
+ # Change substrings issue
6282
+ indices = [i for i, name in enumerate(self._characteristics_names) if name == names]
6283
+ indices = [i for i, name in enumerate(self._characteristics_names) if name in names and isinstance(name, str)]
6210
6284
  return indices
6211
6285
 
6212
6286
  """
@@ -6482,10 +6556,7 @@ class ObjectiveFunction(object):
6482
6556
  else:
6483
6557
  rv_indices.append(rv_count_all - 1)
6484
6558
 
6485
- # for s.d.: gr_w = (Obs prob. minus predicted probability) * obs. var * random draw
6486
- draws_tril_idx = np.array([corr_indices[j]
6487
- for i in range(len(self.none_handler(self.rdm_cor_fit)))
6488
- for j in range(i + 1)]) # varnames pos.
6559
+
6489
6560
  X_tril_idx = np.array([corr_indices[i]
6490
6561
  for i in range(len(self.none_handler(self.rdm_cor_fit)))
6491
6562
  for j in range(i + 1)])
@@ -6494,12 +6565,55 @@ class ObjectiveFunction(object):
6494
6565
  range_var = [x for x in
6495
6566
  range(len(self.none_handler(var_uncor)))]
6496
6567
  range_var = sorted(range_var)
6497
- draws_tril_idx = np.array(np.concatenate((range_var, draws_tril_idx)))
6568
+
6498
6569
  X_tril_idx = np.array(np.concatenate((range_var, X_tril_idx)))
6499
- draws_tril_idx = draws_tril_idx.astype(int)
6570
+
6500
6571
  X_tril_idx = X_tril_idx.astype(int)
6501
6572
  return X_tril_idx
6502
6573
 
6574
+ def get_X_draw_tril(self):
6575
+ '''For correlations find the repeating terms'''
6576
+ varnames = self.none_join([self.rdm_grouped_fit, self.rdm_fit, self.rdm_cor_fit])
6577
+ rv_count_all = 0
6578
+ chol_count = 0
6579
+ rv_count = 0
6580
+ corr_indices = []
6581
+ rv_indices = []
6582
+ for ii, var in enumerate(varnames): # TODO: BUGFIXf
6583
+ if var in self.none_handler(self.rdm_cor_fit):
6584
+ is_correlated = True
6585
+ else:
6586
+ is_correlated = False
6587
+
6588
+ rv_count_all += 1
6589
+ if is_correlated:
6590
+ chol_count += 1
6591
+ else:
6592
+ rv_count += 1
6593
+
6594
+ if var in self.none_handler(self.rdm_cor_fit):
6595
+
6596
+ corr_indices.append(rv_count_all - 1) # TODO: what does tis do
6597
+
6598
+ else:
6599
+ rv_indices.append(rv_count_all - 1)
6600
+
6601
+ # for s.d.: gr_w = (Obs prob. minus predicted probability) * obs. var * random draw
6602
+ draws_tril_idx = np.array([corr_indices[j]
6603
+ for i in range(len(self.none_handler(self.rdm_cor_fit)))
6604
+ for j in range(i + 1)]) # varnames pos.
6605
+
6606
+ # Find the s.d. for random variables that are not correlated
6607
+ var_uncor = self.none_join([self.rdm_grouped_fit, self.rdm_fit])
6608
+ range_var = [x for x in
6609
+ range(len(self.none_handler(var_uncor)))]
6610
+ range_var = sorted(range_var)
6611
+ draws_tril_idx = np.array(np.concatenate((range_var, draws_tril_idx)))
6612
+
6613
+ draws_tril_idx = draws_tril_idx.astype(int)
6614
+
6615
+ return draws_tril_idx
6616
+
6503
6617
 
6504
6618
 
6505
6619
  def modifyn(self, data):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: metacountregressor
3
- Version: 0.1.176
3
+ Version: 0.1.203
4
4
  Summary: Extensive Testing for Estimation of Data Count Models
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern
@@ -16,7 +16,7 @@ Requires-Dist: latextable
16
16
  Requires-Dist: pandas
17
17
  Requires-Dist: scikit_learn>=1.4.1.post1
18
18
  Requires-Dist: statsmodels
19
- Requires-Dist: psustil
19
+ Requires-Dist: psutil
20
20
  Dynamic: author
21
21
  Dynamic: author-email
22
22
  Dynamic: description
@@ -5,4 +5,4 @@ latextable
5
5
  pandas
6
6
  scikit_learn>=1.4.1.post1
7
7
  statsmodels
8
- psustil
8
+ psutil
@@ -72,6 +72,6 @@ setuptools.setup(
72
72
  'pandas',
73
73
  'scikit_learn>=1.4.1.post1',
74
74
  'statsmodels',
75
- 'psustil'
75
+ 'psutil'
76
76
  ]
77
77
  )