metacountregressor 0.1.71__tar.gz → 0.1.86__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (26) hide show
  1. {metacountregressor-0.1.71 → metacountregressor-0.1.86}/PKG-INFO +78 -20
  2. {metacountregressor-0.1.71 → metacountregressor-0.1.86}/README.rst +97 -23
  3. metacountregressor-0.1.86/metacountregressor/data_split_helper.py +90 -0
  4. {metacountregressor-0.1.71 → metacountregressor-0.1.86}/metacountregressor/helperprocess.py +115 -0
  5. {metacountregressor-0.1.71 → metacountregressor-0.1.86}/metacountregressor/main.py +51 -72
  6. {metacountregressor-0.1.71 → metacountregressor-0.1.86}/metacountregressor/metaheuristics.py +25 -24
  7. {metacountregressor-0.1.71 → metacountregressor-0.1.86}/metacountregressor/solution.py +281 -694
  8. {metacountregressor-0.1.71 → metacountregressor-0.1.86}/metacountregressor.egg-info/PKG-INFO +78 -20
  9. {metacountregressor-0.1.71 → metacountregressor-0.1.86}/metacountregressor.egg-info/SOURCES.txt +1 -0
  10. {metacountregressor-0.1.71 → metacountregressor-0.1.86}/metacountregressor.egg-info/requires.txt +1 -0
  11. {metacountregressor-0.1.71 → metacountregressor-0.1.86}/setup.py +27 -4
  12. {metacountregressor-0.1.71 → metacountregressor-0.1.86}/LICENSE.txt +0 -0
  13. {metacountregressor-0.1.71 → metacountregressor-0.1.86}/metacountregressor/__init__.py +0 -0
  14. {metacountregressor-0.1.71 → metacountregressor-0.1.86}/metacountregressor/_device_cust.py +0 -0
  15. {metacountregressor-0.1.71 → metacountregressor-0.1.86}/metacountregressor/halton.py +0 -0
  16. {metacountregressor-0.1.71 → metacountregressor-0.1.86}/metacountregressor/main_old.py +0 -0
  17. {metacountregressor-0.1.71 → metacountregressor-0.1.86}/metacountregressor/pareto_file.py +0 -0
  18. {metacountregressor-0.1.71 → metacountregressor-0.1.86}/metacountregressor/pareto_logger__plot.py +0 -0
  19. {metacountregressor-0.1.71 → metacountregressor-0.1.86}/metacountregressor/setup.py +0 -0
  20. {metacountregressor-0.1.71 → metacountregressor-0.1.86}/metacountregressor/single_objective_finder.py +0 -0
  21. {metacountregressor-0.1.71 → metacountregressor-0.1.86}/metacountregressor/test_generated_paper2.py +0 -0
  22. {metacountregressor-0.1.71 → metacountregressor-0.1.86}/metacountregressor.egg-info/dependency_links.txt +0 -0
  23. {metacountregressor-0.1.71 → metacountregressor-0.1.86}/metacountregressor.egg-info/not-zip-safe +0 -0
  24. {metacountregressor-0.1.71 → metacountregressor-0.1.86}/metacountregressor.egg-info/top_level.txt +0 -0
  25. {metacountregressor-0.1.71 → metacountregressor-0.1.86}/setup.cfg +0 -0
  26. {metacountregressor-0.1.71 → metacountregressor-0.1.86}/tests/test.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: metacountregressor
3
- Version: 0.1.71
3
+ Version: 0.1.86
4
4
  Summary: Extensions for a Python package for estimation of count models.
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern
@@ -11,12 +11,18 @@ Description-Content-Type: text/markdown
11
11
  License-File: LICENSE.txt
12
12
  Requires-Dist: numpy>=1.13.1
13
13
  Requires-Dist: scipy>=1.0.0
14
+ Requires-Dist: requests
14
15
 
15
16
  <div style="display: flex; align-items: center;">
16
- <img src="https://github.com/zahern/data/raw/main/m.png" alt="My Image" style="width: 200px; margin-right: 20px;">
17
+ <img src="https://github.com/zahern/data/raw/main/m.png" alt="My Image" style="width: 100px; margin-right: 20px;">
17
18
  <p><span style="font-size: 60px;"><strong>MetaCountRegressor</strong></span></p>
18
19
  </div>
19
20
 
21
+ # Tutorial also available as a jupyter notebook
22
+ [Download Example Notebook](https://github.com/zahern/CountDataEstimation/blob/main/Tutorial.ipynb)
23
+
24
+ The tutorial provides more extensive examples on how to run the code and perform experiments. Further documentation is currently in development.
25
+
20
26
  ##### Quick Setup
21
27
  The Below code demonstrates how to set up automatic optimization assisted by the harmony search algorithm. References to the Differential Evolution and Simulated Annealing has been mentioned (change accordingly)
22
28
 
@@ -35,8 +41,15 @@ from metacountregressor.solution import ObjectiveFunction
35
41
  from metacountregressor.metaheuristics import (harmony_search,
36
42
  differential_evolution,
37
43
  simulated_annealing)
44
+
45
+
38
46
  ```
39
47
 
48
+ loaded standard packages
49
+ loaded helper
50
+ testing
51
+
52
+
40
53
  #### Basic setup.
41
54
  The initial setup involves reading in the data and selecting an optimization algorithm. As the runtime progresses, new solutions will be continually evaluated. Finally, at the end of the runtime, the best solution will be identified and printed out. In the case of multiple objectives all of the best solutions will be printed out that belong to the Pareto frontier.
42
55
 
@@ -53,7 +66,7 @@ X = df.drop(columns=['FREQ', 'ID', 'AADT'])
53
66
 
54
67
  #some example argument, these are defualt so the following line is just for claritity. See the later agruments section for detials.
55
68
  arguments = {'algorithm': 'hs', 'test_percentage': 0.15, 'test_complexity': 6, 'instance_number':1,
56
- 'val_percentage':0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "MAX_TIME": 6}
69
+ 'val_percentage':0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "_max_time": 6}
57
70
  # Fit the model with metacountregressor
58
71
  obj_fun = ObjectiveFunction(X, y, **arguments)
59
72
  #replace with other metaheuristics if desired
@@ -71,7 +84,7 @@ Note: Please Consider the main arguments to change.
71
84
  - `val_percentage`: This parameter represents the percentage of data used to validate the model. The value 0.15 corresponds to 15% of the data.
72
85
  - `test_complexity`: This parameter defines the complexity level for testing. The value 6 tests all complexities. Alternatively, you can provide a list of numbers to consider different complexities. The complexities are further explained later in this document.
73
86
  - `instance_number`: This parameter is used to give a name to the outputs.
74
- - `obj_1`: This parameter has multiple choices for obj_1, such as 'bic', 'aic', and 'hqic'. Only one choice should be defined as a string value.
87
+ - `_obj_1`: This parameter has multiple choices for obj_1, such as 'bic', 'aic', and 'hqic'. Only one choice should be defined as a string value.
75
88
  - `_obj_2`: This parameter has multiple choices for objective 2, such as 'RMSE_TEST', 'MSE_TEST', and 'MAE_TEST'.
76
89
  - `_max_time`: This parameter specifies the maximum number of seconds for the total estimation before stopping.
77
90
  - `distribution`: This parameter is a list of distributions to consider. Please select all of the available options and put them into a list of valid options if you want to to consider the distribution type for use when modellign with random parameters. The valid options include: 'Normal', 'LnNormal', 'Triangular', and 'Uniform'.
@@ -80,7 +93,7 @@ Note: Please Consider the main arguments to change.
80
93
 
81
94
 
82
95
 
83
- ### An Example of changing the arguments.
96
+ ### Example of changing the arguments:
84
97
  Modify the arguments according to your preferences using the commented code as a guide.
85
98
 
86
99
 
@@ -108,16 +121,18 @@ Listed below is an example of how to specify an initial solution within the fram
108
121
 
109
122
 
110
123
  ```python
111
- #Model Decisions, Specify for Intial Optimization
124
+ #Model Decisions, Specify for initial solution that will be optimised.
112
125
  manual_fit_spec = {
113
126
  'fixed_terms': ['SINGLE', 'LENGTH'],
114
127
  'rdm_terms': ['AADT:normal'],
115
- 'rdm_cor_terms': ['GRADEBR:uniform', 'CURVES:triangular'],
128
+ 'rdm_cor_terms': ['GRADEBR:normal', 'CURVES:normal'],
116
129
  'grouped_terms': [],
117
130
  'hetro_in_means': ['ACCESS:normal', 'MINRAD:normal'],
118
131
  'transformations': ['no', 'no', 'log', 'no', 'no', 'no', 'no'],
119
- 'dispersion': 1
132
+ 'dispersion': 0
120
133
  }
134
+
135
+
121
136
  #Search Arguments
122
137
  arguments = {
123
138
  'algorithm': 'hs',
@@ -129,7 +144,47 @@ arguments = {
129
144
  obj_fun = ObjectiveFunction(X, y, **arguments)
130
145
  ```
131
146
 
132
- simarly to return the results feed the objective function into a metaheuristic solution algorithm. An example of this is provided below:
147
+ Setup Complete...
148
+ Benchmaking test with Seed 42
149
+ --------------------------------------------------------------------------------
150
+ Log-Likelihood: -1339.1862434675106
151
+ --------------------------------------------------------------------------------
152
+ bic: 2732.31
153
+ --------------------------------------------------------------------------------
154
+ MSE: 650856.32
155
+ +--------------------------+--------+-------+----------+----------+------------+
156
+ | Effect | $\tau$ | Coeff | Std. Err | z-values | Prob |z|>Z |
157
+ +==========================+========+=======+==========+==========+============+
158
+ | LENGTH | no | -0.15 | 0.01 | -12.98 | 0.00*** |
159
+ +--------------------------+--------+-------+----------+----------+------------+
160
+ | SINGLE | no | -2.46 | 0.04 | -50.00 | 0.00*** |
161
+ +--------------------------+--------+-------+----------+----------+------------+
162
+ | GRADEBR | log | 4.23 | 0.10 | 42.17 | 0.00*** |
163
+ +--------------------------+--------+-------+----------+----------+------------+
164
+ | CURVES | no | 0.51 | 0.01 | 34.78 | 0.00*** |
165
+ +--------------------------+--------+-------+----------+----------+------------+
166
+ | Chol: GRADEBR (Std. | | 2.21 | 0.00 | 50.00 | 0.00*** |
167
+ | Dev. normal) ) | | | | | |
168
+ +--------------------------+--------+-------+----------+----------+------------+
169
+ | Chol: CURVES (Std. Dev. | | -0.51 | 0.00 | -50.00 | 0.00*** |
170
+ | normal) ) | | | | | |
171
+ +--------------------------+--------+-------+----------+----------+------------+
172
+ | Chol: CURVES (Std. Dev. | no | 0.55 | 0.00 | 50.00 | 0.00*** |
173
+ | normal) . GRADEBR (Std. | | | | | |
174
+ | Dev. normal ) | | | | | |
175
+ +--------------------------+--------+-------+----------+----------+------------+
176
+ | main: MINRAD: hetro | no | -0.00 | 0.00 | -44.36 | 0.00*** |
177
+ | group 0 | | | | | |
178
+ +--------------------------+--------+-------+----------+----------+------------+
179
+ | ACCESS: hetro group 0 | | 0.68 | 0.09 | 7.68 | 0.00*** |
180
+ +--------------------------+--------+-------+----------+----------+------------+
181
+ | main: MINRAD: hetro | | -0.00 | 0.00 | -44.86 | 0.00*** |
182
+ | group 0:normal:sd hetro | | | | | |
183
+ | group 0 | | | | | |
184
+ +--------------------------+--------+-------+----------+----------+------------+
185
+
186
+
187
+ Simarly to return the results feed the objective function into a metaheuristic solution algorithm. An example of this is provided below:
133
188
 
134
189
 
135
190
  ```python
@@ -137,7 +192,7 @@ results = harmony_search(obj_fun)
137
192
  print(results)
138
193
  ```
139
194
 
140
- ## Notes:
195
+ # Notes:
141
196
  ### Capabilities of the software include:
142
197
  * Handling of Panel Data
143
198
  * Support for Data Transformations
@@ -155,11 +210,11 @@ Capability to handle heterogeneity in the means of the random parameters
155
210
  * Customization of Hyper-parameters to solve problems tailored to your dataset
156
211
  * Out-of-the-box optimization capability using default metaheuristics
157
212
 
158
- ### Intreting the output of the model:
213
+ ### Intepreting the output of the model:
159
214
  A regression table is produced. The following text elements are explained:
160
215
  - Std. Dev.: This column appears for effects that are related to random paramters and displays the assument distributional assumption next to it
161
216
  - Chol: This term refers to Cholesky decomposition element, to show the correlation between two random paramaters. The combination of the cholesky element on iyself is equivalent to a normal random parameter.
162
- - hetro group #: This term represents the heterogeneity group number, which refers all of the contributing factors that share hetrogentiy in the means to each other under the same numbered value.
217
+ - hetro group: This term represents the heterogeneity group number, which refers all of the contributing factors that share hetrogentiy in the means to each other under the same numbered value.
163
218
  - $\tau$: This column, displays the type of transformation that was applied to the specific contributing factor in the data.
164
219
 
165
220
 
@@ -211,10 +266,10 @@ The following list describes the arguments available in this function. By defaul
211
266
 
212
267
  8. **`_max_time`**: This argument is used to add a termination time in the algorithm. It takes values as seconds. Note the time is only dependenant on the time after intial population of solutions are generated.
213
268
 
214
- # Example
269
+ ## Example: Assistance by Harmony Search
215
270
 
216
271
 
217
- Let's start by fitting very simple models, use those model sto help and define the objectives, then perform more of an extensive search on the variables that are identified more commonly
272
+ Let's begin by fitting very simple models and use the structure of these models to define our objectives. Then, we can conduct a more extensive search on the variables that are more frequently identified. For instance, in the case below, the complexity is level 3, indicating that we will consider, at most randomly correlated parameters. This approach is useful for initially identifying a suitable set of contributing factors for our search.
218
273
 
219
274
 
220
275
 
@@ -241,27 +296,30 @@ arguments = {
241
296
  '_max_time': 10000
242
297
  }
243
298
  obj_fun = ObjectiveFunction(X, y, **arguments)
244
-
245
299
  results = harmony_search(obj_fun)
246
300
  print(results)
247
301
  ```
248
302
 
303
+ ## Paper
304
+
305
+ The following tutorial is in conjunction with our latest paper. A link the current paper can be found here [MetaCountRegressor](https://www.overleaf.com/read/mszwpwzcxsng#c5eb0c)
306
+
249
307
  ## Contact
250
308
  If you have any questions, ideas to improve MetaCountRegressor, or want to report a bug, just open a new issue in [GitHub repository](https://github.com/zahern/CountDataEstimation).
251
309
 
252
310
  ## Citing MetaCountRegressor
253
311
  Please cite MetaCountRegressor as follows:
254
312
 
255
- Ahern, Z., Corry P., Paz A. (2023). MetaCountRegressor [Computer software]. [https://pypi.org/project/metacounregressor/](https://pypi.org/project/metacounregressor/)
313
+ Ahern, Z., Corry P., Paz A. (2024). MetaCountRegressor [Computer software]. [https://pypi.org/project/metacounregressor/](https://pypi.org/project/metacounregressor/)
256
314
 
257
315
  Or using BibTex as follows:
258
316
 
259
317
  ```bibtex
260
- @misc{Ahern2023,
261
- author = {Zeke Ahern and Paul Corry and Alexander Paz},
318
+ @misc{Ahern2024Meta,
319
+ author = {Zeke Ahern, Paul Corry and Alexander Paz},
262
320
  journal = {PyPi},
263
321
  title = {metacountregressor · PyPI},
264
- url = {https://pypi.org/project/metacountregressor/0.1.47/},
265
- year = {2023},
322
+ url = {https://pypi.org/project/metacountregressor/0.1.80/},
323
+ year = {2024},
266
324
  }
267
325
 
@@ -2,9 +2,18 @@
2
2
 
3
3
  ::
4
4
 
5
- <img src="https://github.com/zahern/data/raw/main/m.png" alt="My Image" style="width: 200px; margin-right: 20px;">
5
+ <img src="https://github.com/zahern/data/raw/main/m.png" alt="My Image" style="width: 100px; margin-right: 20px;">
6
6
  <p><span style="font-size: 60px;"><strong>MetaCountRegressor</strong></span></p>
7
7
 
8
+ Tutorial also available as a jupyter notebook
9
+ =============================================
10
+
11
+ `Download Example
12
+ Notebook <https://github.com/zahern/CountDataEstimation/blob/main/README.ipynb>`__
13
+
14
+ The tutorial provides more extensive examples on how to run the code and
15
+ perform experiments. Further documentation is currently in development.
16
+
8
17
  Quick Setup
9
18
  '''''''''''
10
19
 
@@ -28,6 +37,16 @@ Install ``metacountregressor`` using pip as follows:
28
37
  from metacountregressor.metaheuristics import (harmony_search,
29
38
  differential_evolution,
30
39
  simulated_annealing)
40
+
41
+
42
+
43
+
44
+ .. parsed-literal::
45
+
46
+ loaded standard packages
47
+ loaded helper
48
+ testing
49
+
31
50
 
32
51
  Basic setup.
33
52
  ^^^^^^^^^^^^
@@ -52,7 +71,7 @@ the Pareto frontier.
52
71
 
53
72
  #some example argument, these are defualt so the following line is just for claritity. See the later agruments section for detials.
54
73
  arguments = {'algorithm': 'hs', 'test_percentage': 0.15, 'test_complexity': 6, 'instance_number':1,
55
- 'val_percentage':0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "MAX_TIME": 6}
74
+ 'val_percentage':0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "_max_time": 6}
56
75
  # Fit the model with metacountregressor
57
76
  obj_fun = ObjectiveFunction(X, y, **arguments)
58
77
  #replace with other metaheuristics if desired
@@ -80,7 +99,7 @@ Note: Please Consider the main arguments to change.
80
99
  complexities are further explained later in this document.
81
100
  - ``instance_number``: This parameter is used to give a name to the
82
101
  outputs.
83
- - ``obj_1``: This parameter has multiple choices for obj_1, such as
102
+ - ``_obj_1``: This parameter has multiple choices for obj_1, such as
84
103
  �bic�, �aic�, and �hqic�. Only one choice should be defined as a
85
104
  string value.
86
105
  - ``_obj_2``: This parameter has multiple choices for objective 2, such
@@ -103,8 +122,8 @@ Note: Please Consider the main arguments to change.
103
122
  valid options include: �Normal�, �LnNormal�, �Triangular�, and
104
123
  �Uniform�.
105
124
 
106
- An Example of changing the arguments.
107
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
125
+ Example of changing the arguments:
126
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
108
127
 
109
128
  Modify the arguments according to your preferences using the commented
110
129
  code as a guide.
@@ -139,16 +158,18 @@ modeling components may completely replace the initial solution.
139
158
 
140
159
  .. code:: ipython3
141
160
 
142
- #Model Decisions, Specify for Intial Optimization
161
+ #Model Decisions, Specify for initial solution that will be optimised.
143
162
  manual_fit_spec = {
144
163
  'fixed_terms': ['SINGLE', 'LENGTH'],
145
164
  'rdm_terms': ['AADT:normal'],
146
- 'rdm_cor_terms': ['GRADEBR:uniform', 'CURVES:triangular'],
165
+ 'rdm_cor_terms': ['GRADEBR:normal', 'CURVES:normal'],
147
166
  'grouped_terms': [],
148
167
  'hetro_in_means': ['ACCESS:normal', 'MINRAD:normal'],
149
168
  'transformations': ['no', 'no', 'log', 'no', 'no', 'no', 'no'],
150
- 'dispersion': 1
169
+ 'dispersion': 0
151
170
  }
171
+
172
+
152
173
  #Search Arguments
153
174
  arguments = {
154
175
  'algorithm': 'hs',
@@ -159,7 +180,50 @@ modeling components may completely replace the initial solution.
159
180
  }
160
181
  obj_fun = ObjectiveFunction(X, y, **arguments)
161
182
 
162
- simarly to return the results feed the objective function into a
183
+
184
+ .. parsed-literal::
185
+
186
+ Setup Complete...
187
+ Benchmaking test with Seed 42
188
+ --------------------------------------------------------------------------------
189
+ Log-Likelihood: -1339.1862434675106
190
+ --------------------------------------------------------------------------------
191
+ bic: 2732.31
192
+ --------------------------------------------------------------------------------
193
+ MSE: 650856.32
194
+ +--------------------------+--------+-------+----------+----------+------------+
195
+ | Effect | $\tau$ | Coeff | Std. Err | z-values | Prob |z|>Z |
196
+ +==========================+========+=======+==========+==========+============+
197
+ | LENGTH | no | -0.15 | 0.01 | -12.98 | 0.00*** |
198
+ +--------------------------+--------+-------+----------+----------+------------+
199
+ | SINGLE | no | -2.46 | 0.04 | -50.00 | 0.00*** |
200
+ +--------------------------+--------+-------+----------+----------+------------+
201
+ | GRADEBR | log | 4.23 | 0.10 | 42.17 | 0.00*** |
202
+ +--------------------------+--------+-------+----------+----------+------------+
203
+ | CURVES | no | 0.51 | 0.01 | 34.78 | 0.00*** |
204
+ +--------------------------+--------+-------+----------+----------+------------+
205
+ | Chol: GRADEBR (Std. | | 2.21 | 0.00 | 50.00 | 0.00*** |
206
+ | Dev. normal) ) | | | | | |
207
+ +--------------------------+--------+-------+----------+----------+------------+
208
+ | Chol: CURVES (Std. Dev. | | -0.51 | 0.00 | -50.00 | 0.00*** |
209
+ | normal) ) | | | | | |
210
+ +--------------------------+--------+-------+----------+----------+------------+
211
+ | Chol: CURVES (Std. Dev. | no | 0.55 | 0.00 | 50.00 | 0.00*** |
212
+ | normal) . GRADEBR (Std. | | | | | |
213
+ | Dev. normal ) | | | | | |
214
+ +--------------------------+--------+-------+----------+----------+------------+
215
+ | main: MINRAD: hetro | no | -0.00 | 0.00 | -44.36 | 0.00*** |
216
+ | group 0 | | | | | |
217
+ +--------------------------+--------+-------+----------+----------+------------+
218
+ | ACCESS: hetro group 0 | | 0.68 | 0.09 | 7.68 | 0.00*** |
219
+ +--------------------------+--------+-------+----------+----------+------------+
220
+ | main: MINRAD: hetro | | -0.00 | 0.00 | -44.86 | 0.00*** |
221
+ | group 0:normal:sd hetro | | | | | |
222
+ | group 0 | | | | | |
223
+ +--------------------------+--------+-------+----------+----------+------------+
224
+
225
+
226
+ Simarly to return the results feed the objective function into a
163
227
  metaheuristic solution algorithm. An example of this is provided below:
164
228
 
165
229
  .. code:: ipython3
@@ -168,7 +232,7 @@ metaheuristic solution algorithm. An example of this is provided below:
168
232
  print(results)
169
233
 
170
234
  Notes:
171
- ------
235
+ ======
172
236
 
173
237
  Capabilities of the software include:
174
238
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -199,8 +263,8 @@ Capabilities of the software include:
199
263
  dataset
200
264
  - Out-of-the-box optimization capability using default metaheuristics
201
265
 
202
- Intreting the output of the model:
203
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
266
+ Intepreting the output of the model:
267
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
204
268
 
205
269
  A regression table is produced. The following text elements are
206
270
  explained: - Std. Dev.: This column appears for effects that are related
@@ -208,7 +272,7 @@ to random paramters and displays the assument distributional assumption
208
272
  next to it - Chol: This term refers to Cholesky decomposition element,
209
273
  to show the correlation between two random paramaters. The combination
210
274
  of the cholesky element on iyself is equivalent to a normal random
211
- parameter. - hetro group #: This term represents the heterogeneity group
275
+ parameter. - hetro group: This term represents the heterogeneity group
212
276
  number, which refers all of the contributing factors that share
213
277
  hetrogentiy in the means to each other under the same numbered value. -
214
278
  :math:`\tau`: This column, displays the type of transformation that was
@@ -299,12 +363,16 @@ considered. Example code will be provided later in this guide.
299
363
  dependenant on the time after intial population of solutions are
300
364
  generated.
301
365
 
302
- Example
303
- =======
366
+ Example: Assistance by Harmony Search
367
+ -------------------------------------
304
368
 
305
- Let�s start by fitting very simple models, use those model sto help and
306
- define the objectives, then perform more of an extensive search on the
307
- variables that are identified more commonly
369
+ Let�s begin by fitting very simple models and use the structure of these
370
+ models to define our objectives. Then, we can conduct a more extensive
371
+ search on the variables that are more frequently identified. For
372
+ instance, in the case below, the complexity is level 3, indicating that
373
+ we will consider, at most randomly correlated parameters. This approach
374
+ is useful for initially identifying a suitable set of contributing
375
+ factors for our search.
308
376
 
309
377
  .. code:: ipython3
310
378
 
@@ -330,10 +398,16 @@ variables that are identified more commonly
330
398
  '_max_time': 10000
331
399
  }
332
400
  obj_fun = ObjectiveFunction(X, y, **arguments)
333
-
334
401
  results = harmony_search(obj_fun)
335
402
  print(results)
336
403
 
404
+ Paper
405
+ -----
406
+
407
+ The following tutorial is in conjunction with our latest paper. A link
408
+ the current paper can be found here
409
+ `MetaCountRegressor <https://www.overleaf.com/read/mszwpwzcxsng#c5eb0c>`__
410
+
337
411
  Contact
338
412
  -------
339
413
 
@@ -346,12 +420,12 @@ Citing MetaCountRegressor
346
420
 
347
421
  Please cite MetaCountRegressor as follows:
348
422
 
349
- Ahern, Z., Corry P., Paz A. (2023). MetaCountRegressor [Computer
423
+ Ahern, Z., Corry P., Paz A. (2024). MetaCountRegressor [Computer
350
424
  software]. https://pypi.org/project/metacounregressor/
351
425
 
352
426
  Or using BibTex as follows:
353
427
 
354
- \```bibtex @misc{Ahern2023, author = {Zeke Ahern and Paul Corry and
428
+ \```bibtex @misc{Ahern2024Meta, author = {Zeke Ahern, Paul Corry and
355
429
  Alexander Paz}, journal = {PyPi}, title = {metacountregressor · PyPI},
356
- url = {https://pypi.org/project/metacountregressor/0.1.47/}, year =
357
- {2023}, }
430
+ url = {https://pypi.org/project/metacountregressor/0.1.80/}, year =
431
+ {2024}, }
@@ -0,0 +1,90 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+
5
+
6
+
7
+ class DataProcessor:
8
+ def __init__(self, x_data, y_data, kwargs):
9
+ self._obj_1 = kwargs.get('_obj_1')
10
+ self._obj_2 = kwargs.get('_obj_2')
11
+ self.test_percentage = float(kwargs.get('test_percentage', 0))
12
+ self.val_percentage = float(kwargs.get('val_percentage', 0))
13
+ self.is_multi = self.test_percentage != 0
14
+ self._x_data = x_data
15
+ self._y_data = y_data
16
+ self._process_data(kwargs)
17
+
18
+ def _process_data(self, kwargs):
19
+ if self._obj_1 == 'MAE' or self._obj_2 in ["MAE", 'RMSE', 'MSE', 'RMSE_IN', 'RMSE_TEST']:
20
+ self._handle_special_conditions(kwargs)
21
+ else:
22
+ self._standard_data_partition()
23
+
24
+ self._characteristics_names = list(self._x_data.columns)
25
+ self._max_group_all_means = 1
26
+ self._exclude_this_test = [4]
27
+
28
+ def _handle_special_conditions(self, kwargs):
29
+ if 'panels' in kwargs:
30
+ self._process_panels_data(kwargs)
31
+ else:
32
+ self._standard_data_partition()
33
+
34
+ def _process_panels_data(self, kwargs):
35
+ group_key = kwargs['group']
36
+ panels_key = kwargs['panels']
37
+
38
+ # Process groups and panels
39
+ self._x_data[group_key] = self._x_data[group_key].astype('category').cat.codes
40
+ try:
41
+ self._x_data[panels_key] = self._x_data[panels_key].rank(method='dense').astype(int)
42
+ self._x_data[panels_key] -= self._x_data[panels_key].min() - 1
43
+ except KeyError:
44
+ pass
45
+
46
+ # Create training and test datasets
47
+ unique_ids = np.unique(self._x_data[panels_key])
48
+ training_size = int((1 - self.test_percentage - self.val_percentage) * len(unique_ids))
49
+ training_ids = np.random.choice(unique_ids, training_size, replace=False)
50
+
51
+ train_idx = self._x_data.index[self._x_data[panels_key].isin(training_ids)]
52
+ test_idx = self._x_data.index[~self._x_data[panels_key].isin(training_ids)]
53
+
54
+ self._create_datasets(train_idx, test_idx)
55
+
56
+ def _standard_data_partition(self):
57
+ total_samples = len(self._x_data)
58
+ training_size = int((1 - self.test_percentage - self.val_percentage) * total_samples)
59
+ training_indices = np.random.choice(total_samples, training_size, replace=False)
60
+
61
+ train_idx = np.array([i for i in range(total_samples) if i in training_indices])
62
+ test_idx = np.array([i for i in range(total_samples) if i not in training_indices])
63
+
64
+ self._create_datasets(train_idx, test_idx)
65
+
66
+ def _create_datasets(self, train_idx, test_idx):
67
+ self.df_train = self._x_data.loc[train_idx, :]
68
+ self.df_test = self._x_data.loc[test_idx, :]
69
+ self.y_train = self._y_data.loc[train_idx, :]
70
+ self.y_test = self._y_data.loc[test_idx, :]
71
+
72
+ self._x_data_test = self.df_test.copy()
73
+ self._y_data_test = self.y_test.astype('float').copy()
74
+ self._x_data = self.df_train.copy()
75
+ self._y_data = self.y_train.astype('float').copy()
76
+
77
+ # Handle different shapes
78
+ if self._x_data.ndim == 2: # Typical DataFrame
79
+ self._samples, self._characteristics = self._x_data.shape
80
+ self._panels = None
81
+ elif self._x_data.ndim == 3: # 3D structure, e.g., Panel or similar
82
+ self._samples, self._panels, self._characteristics = self._x_data.shape
83
+
84
+
85
+
86
+
87
+
88
+
89
+
90
+
@@ -5,6 +5,121 @@ import matplotlib.pyplot as plt
5
5
 
6
6
  plt.style.use('https://github.com/dhaitz/matplotlib-stylesheets/raw/master/pitayasmoothie-dark.mplstyle')
7
7
 
8
+ ##Select the best Features Based on RF
9
+ def select_features(X_train, y_train, n_f=16):
10
+ try:
11
+ from sklearn.feature_selection import SelectKBest
12
+ from sklearn.feature_selection import f_regression
13
+ feature_names = X_train.columns
14
+ # configure to select all features
15
+ fs = SelectKBest(score_func=f_regression, k=16)
16
+
17
+ # learn relationship from training data
18
+ fs.fit(X_train, y_train)
19
+
20
+ mask = fs.get_support() # Boolean array of selected features
21
+ selected_features = [feature for bool, feature in zip(mask, feature_names) if bool]
22
+ X_train = X_train[selected_features]
23
+ except:
24
+ print('import error, not performing feature selection')
25
+ fs = X_train.columns #TODO check if this is actually getting the names
26
+
27
+ return X_train, fs
28
+
29
+
30
+ #Cutts off correlated data
31
+
32
+
33
+
34
+
35
+
36
+ def findCorrelation(corr, cutoff=0.9, exact=None): """
37
+ This function is the Python implementation of the R function
38
+ `findCorrelation()`.
39
+
40
+ Relies on numpy and pandas, so must have them pre-installed.
41
+
42
+ It searches through a correlation matrix and returns a list of column names
43
+ to remove to reduce pairwise correlations.
44
+
45
+ For the documentation of the R function, see
46
+ https://www.rdocumentation.org/packages/caret/topics/findCorrelation
47
+ and for the source code of `findCorrelation()`, see
48
+ https://github.com/topepo/caret/blob/master/pkg/caret/R/findCorrelation.R
49
+
50
+ -----------------------------------------------------------------------------
51
+
52
+ Parameters:
53
+ -----------
54
+ corr: pandas dataframe.
55
+ A correlation matrix as a pandas dataframe.
56
+ cutoff: float, default: 0.9.
57
+ A numeric value for the pairwise absolute correlation cutoff
58
+ exact: bool, default: None
59
+ A boolean value that determines whether the average correlations be
60
+ recomputed at each step
61
+ -----------------------------------------------------------------------------
62
+ Returns:
63
+ --------
64
+ list of column names
65
+ -----------------------------------------------------------------------------
66
+ Example:
67
+ --------
68
+ R1 = pd.DataFrame({
69
+ 'x1': [1.0, 0.86, 0.56, 0.32, 0.85],
70
+ 'x2': [0.86, 1.0, 0.01, 0.74, 0.32],
71
+ 'x3': [0.56, 0.01, 1.0, 0.65, 0.91],
72
+ 'x4': [0.32, 0.74, 0.65, 1.0, 0.36],
73
+ 'x5': [0.85, 0.32, 0.91, 0.36, 1.0]
74
+ }, index=['x1', 'x2', 'x3', 'x4', 'x5'])
75
+
76
+ findCorrelation(R1, cutoff=0.6, exact=False) # ['x4', 'x5', 'x1', 'x3']
77
+ findCorrelation(R1, cutoff=0.6, exact=True) # ['x1', 'x5', 'x4']
78
+ """
79
+
80
+ def _findCorrelation_fast(corr, avg, cutoff):
81
+
82
+ combsAboveCutoff = corr.where(lambda x: (np.tril(x) == 0) & (x > cutoff)).stack().index
83
+
84
+ rowsToCheck = combsAboveCutoff.get_level_values(0)
85
+ colsToCheck = combsAboveCutoff.get_level_values(1)
86
+
87
+ msk = avg[colsToCheck] > avg[rowsToCheck].values
88
+ deletecol = pd.unique(np.r_[colsToCheck[msk], rowsToCheck[~msk]]).tolist()
89
+
90
+ return deletecol
91
+
92
+ def _findCorrelation_exact(corr, avg, cutoff):
93
+
94
+ x = corr.loc[(*[avg.sort_values(ascending=False).index] * 2,)]
95
+
96
+ if (x.dtypes.values[:, None] == ['int64', 'int32', 'int16', 'int8']).any():
97
+ x = x.astype(float)
98
+
99
+ x.values[(*[np.arange(len(x))] * 2,)] = np.nan
100
+
101
+ deletecol = []
102
+ for ix, i in enumerate(x.columns[:-1]):
103
+ for j in x.columns[ix + 1:]:
104
+ if x.loc[i, j] > cutoff:
105
+ if x[i].mean() > x[j].mean():
106
+ deletecol.append(i)
107
+ x.loc[i] = x[i] = np.nan
108
+ else:
109
+ deletecol.append(j)
110
+ x.loc[j] = x[j] = np.nan
111
+
112
+
113
+
114
+
115
+ """Funtion to Convert Data to Binaries """
116
+ def clean_data_types(df):
117
+ for col in df.columns:
118
+ if df[col].dtype == 'object':
119
+ # Attempt to convert the column to numeric type
120
+ df[col] = pd.to_numeric(df[col], errors='coerce')
121
+ return df
122
+
8
123
 
9
124
  def drop_correlations(x_df, percentage=0.85):
10
125
  cor_matrix = x_df.corr().abs()