metacountregressor 0.1.76__tar.gz → 0.1.91__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {metacountregressor-0.1.76 → metacountregressor-0.1.91}/PKG-INFO +78 -20
- {metacountregressor-0.1.76 → metacountregressor-0.1.91}/README.rst +97 -23
- {metacountregressor-0.1.76 → metacountregressor-0.1.91}/metacountregressor/main.py +73 -8
- {metacountregressor-0.1.76 → metacountregressor-0.1.91}/metacountregressor/solution.py +158 -110
- {metacountregressor-0.1.76 → metacountregressor-0.1.91}/metacountregressor.egg-info/PKG-INFO +78 -20
- {metacountregressor-0.1.76 → metacountregressor-0.1.91}/metacountregressor.egg-info/requires.txt +1 -0
- {metacountregressor-0.1.76 → metacountregressor-0.1.91}/setup.py +27 -3
- {metacountregressor-0.1.76 → metacountregressor-0.1.91}/LICENSE.txt +0 -0
- {metacountregressor-0.1.76 → metacountregressor-0.1.91}/metacountregressor/__init__.py +0 -0
- {metacountregressor-0.1.76 → metacountregressor-0.1.91}/metacountregressor/_device_cust.py +0 -0
- {metacountregressor-0.1.76 → metacountregressor-0.1.91}/metacountregressor/data_split_helper.py +0 -0
- {metacountregressor-0.1.76 → metacountregressor-0.1.91}/metacountregressor/halton.py +0 -0
- {metacountregressor-0.1.76 → metacountregressor-0.1.91}/metacountregressor/helperprocess.py +0 -0
- {metacountregressor-0.1.76 → metacountregressor-0.1.91}/metacountregressor/main_old.py +0 -0
- {metacountregressor-0.1.76 → metacountregressor-0.1.91}/metacountregressor/metaheuristics.py +0 -0
- {metacountregressor-0.1.76 → metacountregressor-0.1.91}/metacountregressor/pareto_file.py +0 -0
- {metacountregressor-0.1.76 → metacountregressor-0.1.91}/metacountregressor/pareto_logger__plot.py +0 -0
- {metacountregressor-0.1.76 → metacountregressor-0.1.91}/metacountregressor/setup.py +0 -0
- {metacountregressor-0.1.76 → metacountregressor-0.1.91}/metacountregressor/single_objective_finder.py +0 -0
- {metacountregressor-0.1.76 → metacountregressor-0.1.91}/metacountregressor/test_generated_paper2.py +0 -0
- {metacountregressor-0.1.76 → metacountregressor-0.1.91}/metacountregressor.egg-info/SOURCES.txt +0 -0
- {metacountregressor-0.1.76 → metacountregressor-0.1.91}/metacountregressor.egg-info/dependency_links.txt +0 -0
- {metacountregressor-0.1.76 → metacountregressor-0.1.91}/metacountregressor.egg-info/not-zip-safe +0 -0
- {metacountregressor-0.1.76 → metacountregressor-0.1.91}/metacountregressor.egg-info/top_level.txt +0 -0
- {metacountregressor-0.1.76 → metacountregressor-0.1.91}/setup.cfg +0 -0
- {metacountregressor-0.1.76 → metacountregressor-0.1.91}/tests/test.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: metacountregressor
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.91
|
4
4
|
Summary: Extensions for a Python package for estimation of count models.
|
5
5
|
Home-page: https://github.com/zahern/CountDataEstimation
|
6
6
|
Author: Zeke Ahern
|
@@ -11,12 +11,18 @@ Description-Content-Type: text/markdown
|
|
11
11
|
License-File: LICENSE.txt
|
12
12
|
Requires-Dist: numpy>=1.13.1
|
13
13
|
Requires-Dist: scipy>=1.0.0
|
14
|
+
Requires-Dist: requests
|
14
15
|
|
15
16
|
<div style="display: flex; align-items: center;">
|
16
|
-
<img src="https://github.com/zahern/data/raw/main/m.png" alt="My Image" style="width:
|
17
|
+
<img src="https://github.com/zahern/data/raw/main/m.png" alt="My Image" style="width: 100px; margin-right: 20px;">
|
17
18
|
<p><span style="font-size: 60px;"><strong>MetaCountRegressor</strong></span></p>
|
18
19
|
</div>
|
19
20
|
|
21
|
+
# Tutorial also available as a jupyter notebook
|
22
|
+
[Download Example Notebook](https://github.com/zahern/CountDataEstimation/blob/main/Tutorial.ipynb)
|
23
|
+
|
24
|
+
The tutorial provides more extensive examples on how to run the code and perform experiments. Further documentation is currently in development.
|
25
|
+
|
20
26
|
##### Quick Setup
|
21
27
|
The Below code demonstrates how to set up automatic optimization assisted by the harmony search algorithm. References to the Differential Evolution and Simulated Annealing has been mentioned (change accordingly)
|
22
28
|
|
@@ -35,8 +41,15 @@ from metacountregressor.solution import ObjectiveFunction
|
|
35
41
|
from metacountregressor.metaheuristics import (harmony_search,
|
36
42
|
differential_evolution,
|
37
43
|
simulated_annealing)
|
44
|
+
|
45
|
+
|
38
46
|
```
|
39
47
|
|
48
|
+
loaded standard packages
|
49
|
+
loaded helper
|
50
|
+
testing
|
51
|
+
|
52
|
+
|
40
53
|
#### Basic setup.
|
41
54
|
The initial setup involves reading in the data and selecting an optimization algorithm. As the runtime progresses, new solutions will be continually evaluated. Finally, at the end of the runtime, the best solution will be identified and printed out. In the case of multiple objectives all of the best solutions will be printed out that belong to the Pareto frontier.
|
42
55
|
|
@@ -53,7 +66,7 @@ X = df.drop(columns=['FREQ', 'ID', 'AADT'])
|
|
53
66
|
|
54
67
|
#some example argument, these are defualt so the following line is just for claritity. See the later agruments section for detials.
|
55
68
|
arguments = {'algorithm': 'hs', 'test_percentage': 0.15, 'test_complexity': 6, 'instance_number':1,
|
56
|
-
'val_percentage':0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "
|
69
|
+
'val_percentage':0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "_max_time": 6}
|
57
70
|
# Fit the model with metacountregressor
|
58
71
|
obj_fun = ObjectiveFunction(X, y, **arguments)
|
59
72
|
#replace with other metaheuristics if desired
|
@@ -71,7 +84,7 @@ Note: Please Consider the main arguments to change.
|
|
71
84
|
- `val_percentage`: This parameter represents the percentage of data used to validate the model. The value 0.15 corresponds to 15% of the data.
|
72
85
|
- `test_complexity`: This parameter defines the complexity level for testing. The value 6 tests all complexities. Alternatively, you can provide a list of numbers to consider different complexities. The complexities are further explained later in this document.
|
73
86
|
- `instance_number`: This parameter is used to give a name to the outputs.
|
74
|
-
- `
|
87
|
+
- `_obj_1`: This parameter has multiple choices for obj_1, such as 'bic', 'aic', and 'hqic'. Only one choice should be defined as a string value.
|
75
88
|
- `_obj_2`: This parameter has multiple choices for objective 2, such as 'RMSE_TEST', 'MSE_TEST', and 'MAE_TEST'.
|
76
89
|
- `_max_time`: This parameter specifies the maximum number of seconds for the total estimation before stopping.
|
77
90
|
- `distribution`: This parameter is a list of distributions to consider. Please select all of the available options and put them into a list of valid options if you want to to consider the distribution type for use when modellign with random parameters. The valid options include: 'Normal', 'LnNormal', 'Triangular', and 'Uniform'.
|
@@ -80,7 +93,7 @@ Note: Please Consider the main arguments to change.
|
|
80
93
|
|
81
94
|
|
82
95
|
|
83
|
-
###
|
96
|
+
### Example of changing the arguments:
|
84
97
|
Modify the arguments according to your preferences using the commented code as a guide.
|
85
98
|
|
86
99
|
|
@@ -108,16 +121,18 @@ Listed below is an example of how to specify an initial solution within the fram
|
|
108
121
|
|
109
122
|
|
110
123
|
```python
|
111
|
-
#Model Decisions, Specify for
|
124
|
+
#Model Decisions, Specify for initial solution that will be optimised.
|
112
125
|
manual_fit_spec = {
|
113
126
|
'fixed_terms': ['SINGLE', 'LENGTH'],
|
114
127
|
'rdm_terms': ['AADT:normal'],
|
115
|
-
'rdm_cor_terms': ['GRADEBR:
|
128
|
+
'rdm_cor_terms': ['GRADEBR:normal', 'CURVES:normal'],
|
116
129
|
'grouped_terms': [],
|
117
130
|
'hetro_in_means': ['ACCESS:normal', 'MINRAD:normal'],
|
118
131
|
'transformations': ['no', 'no', 'log', 'no', 'no', 'no', 'no'],
|
119
|
-
'dispersion':
|
132
|
+
'dispersion': 0
|
120
133
|
}
|
134
|
+
|
135
|
+
|
121
136
|
#Search Arguments
|
122
137
|
arguments = {
|
123
138
|
'algorithm': 'hs',
|
@@ -129,7 +144,47 @@ arguments = {
|
|
129
144
|
obj_fun = ObjectiveFunction(X, y, **arguments)
|
130
145
|
```
|
131
146
|
|
132
|
-
|
147
|
+
Setup Complete...
|
148
|
+
Benchmaking test with Seed 42
|
149
|
+
--------------------------------------------------------------------------------
|
150
|
+
Log-Likelihood: -1339.1862434675106
|
151
|
+
--------------------------------------------------------------------------------
|
152
|
+
bic: 2732.31
|
153
|
+
--------------------------------------------------------------------------------
|
154
|
+
MSE: 650856.32
|
155
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
156
|
+
| Effect | $\tau$ | Coeff | Std. Err | z-values | Prob |z|>Z |
|
157
|
+
+==========================+========+=======+==========+==========+============+
|
158
|
+
| LENGTH | no | -0.15 | 0.01 | -12.98 | 0.00*** |
|
159
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
160
|
+
| SINGLE | no | -2.46 | 0.04 | -50.00 | 0.00*** |
|
161
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
162
|
+
| GRADEBR | log | 4.23 | 0.10 | 42.17 | 0.00*** |
|
163
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
164
|
+
| CURVES | no | 0.51 | 0.01 | 34.78 | 0.00*** |
|
165
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
166
|
+
| Chol: GRADEBR (Std. | | 2.21 | 0.00 | 50.00 | 0.00*** |
|
167
|
+
| Dev. normal) ) | | | | | |
|
168
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
169
|
+
| Chol: CURVES (Std. Dev. | | -0.51 | 0.00 | -50.00 | 0.00*** |
|
170
|
+
| normal) ) | | | | | |
|
171
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
172
|
+
| Chol: CURVES (Std. Dev. | no | 0.55 | 0.00 | 50.00 | 0.00*** |
|
173
|
+
| normal) . GRADEBR (Std. | | | | | |
|
174
|
+
| Dev. normal ) | | | | | |
|
175
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
176
|
+
| main: MINRAD: hetro | no | -0.00 | 0.00 | -44.36 | 0.00*** |
|
177
|
+
| group 0 | | | | | |
|
178
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
179
|
+
| ACCESS: hetro group 0 | | 0.68 | 0.09 | 7.68 | 0.00*** |
|
180
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
181
|
+
| main: MINRAD: hetro | | -0.00 | 0.00 | -44.86 | 0.00*** |
|
182
|
+
| group 0:normal:sd hetro | | | | | |
|
183
|
+
| group 0 | | | | | |
|
184
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
185
|
+
|
186
|
+
|
187
|
+
Simarly to return the results feed the objective function into a metaheuristic solution algorithm. An example of this is provided below:
|
133
188
|
|
134
189
|
|
135
190
|
```python
|
@@ -137,7 +192,7 @@ results = harmony_search(obj_fun)
|
|
137
192
|
print(results)
|
138
193
|
```
|
139
194
|
|
140
|
-
|
195
|
+
# Notes:
|
141
196
|
### Capabilities of the software include:
|
142
197
|
* Handling of Panel Data
|
143
198
|
* Support for Data Transformations
|
@@ -155,11 +210,11 @@ Capability to handle heterogeneity in the means of the random parameters
|
|
155
210
|
* Customization of Hyper-parameters to solve problems tailored to your dataset
|
156
211
|
* Out-of-the-box optimization capability using default metaheuristics
|
157
212
|
|
158
|
-
###
|
213
|
+
### Intepreting the output of the model:
|
159
214
|
A regression table is produced. The following text elements are explained:
|
160
215
|
- Std. Dev.: This column appears for effects that are related to random paramters and displays the assument distributional assumption next to it
|
161
216
|
- Chol: This term refers to Cholesky decomposition element, to show the correlation between two random paramaters. The combination of the cholesky element on iyself is equivalent to a normal random parameter.
|
162
|
-
- hetro group
|
217
|
+
- hetro group: This term represents the heterogeneity group number, which refers all of the contributing factors that share hetrogentiy in the means to each other under the same numbered value.
|
163
218
|
- $\tau$: This column, displays the type of transformation that was applied to the specific contributing factor in the data.
|
164
219
|
|
165
220
|
|
@@ -211,10 +266,10 @@ The following list describes the arguments available in this function. By defaul
|
|
211
266
|
|
212
267
|
8. **`_max_time`**: This argument is used to add a termination time in the algorithm. It takes values as seconds. Note the time is only dependenant on the time after intial population of solutions are generated.
|
213
268
|
|
214
|
-
|
269
|
+
## Example: Assistance by Harmony Search
|
215
270
|
|
216
271
|
|
217
|
-
Let's
|
272
|
+
Let's begin by fitting very simple models and use the structure of these models to define our objectives. Then, we can conduct a more extensive search on the variables that are more frequently identified. For instance, in the case below, the complexity is level 3, indicating that we will consider, at most randomly correlated parameters. This approach is useful for initially identifying a suitable set of contributing factors for our search.
|
218
273
|
|
219
274
|
|
220
275
|
|
@@ -241,27 +296,30 @@ arguments = {
|
|
241
296
|
'_max_time': 10000
|
242
297
|
}
|
243
298
|
obj_fun = ObjectiveFunction(X, y, **arguments)
|
244
|
-
|
245
299
|
results = harmony_search(obj_fun)
|
246
300
|
print(results)
|
247
301
|
```
|
248
302
|
|
303
|
+
## Paper
|
304
|
+
|
305
|
+
The following tutorial is in conjunction with our latest paper. A link the current paper can be found here [MetaCountRegressor](https://www.overleaf.com/read/mszwpwzcxsng#c5eb0c)
|
306
|
+
|
249
307
|
## Contact
|
250
308
|
If you have any questions, ideas to improve MetaCountRegressor, or want to report a bug, just open a new issue in [GitHub repository](https://github.com/zahern/CountDataEstimation).
|
251
309
|
|
252
310
|
## Citing MetaCountRegressor
|
253
311
|
Please cite MetaCountRegressor as follows:
|
254
312
|
|
255
|
-
Ahern, Z., Corry P., Paz A. (
|
313
|
+
Ahern, Z., Corry P., Paz A. (2024). MetaCountRegressor [Computer software]. [https://pypi.org/project/metacounregressor/](https://pypi.org/project/metacounregressor/)
|
256
314
|
|
257
315
|
Or using BibTex as follows:
|
258
316
|
|
259
317
|
```bibtex
|
260
|
-
@misc{
|
261
|
-
author = {Zeke Ahern
|
318
|
+
@misc{Ahern2024Meta,
|
319
|
+
author = {Zeke Ahern, Paul Corry and Alexander Paz},
|
262
320
|
journal = {PyPi},
|
263
321
|
title = {metacountregressor · PyPI},
|
264
|
-
url = {https://pypi.org/project/metacountregressor/0.1.
|
265
|
-
year = {
|
322
|
+
url = {https://pypi.org/project/metacountregressor/0.1.80/},
|
323
|
+
year = {2024},
|
266
324
|
}
|
267
325
|
|
@@ -2,9 +2,18 @@
|
|
2
2
|
|
3
3
|
::
|
4
4
|
|
5
|
-
<img src="https://github.com/zahern/data/raw/main/m.png" alt="My Image" style="width:
|
5
|
+
<img src="https://github.com/zahern/data/raw/main/m.png" alt="My Image" style="width: 100px; margin-right: 20px;">
|
6
6
|
<p><span style="font-size: 60px;"><strong>MetaCountRegressor</strong></span></p>
|
7
7
|
|
8
|
+
Tutorial also available as a jupyter notebook
|
9
|
+
=============================================
|
10
|
+
|
11
|
+
`Download Example
|
12
|
+
Notebook <https://github.com/zahern/CountDataEstimation/blob/main/README.ipynb>`__
|
13
|
+
|
14
|
+
The tutorial provides more extensive examples on how to run the code and
|
15
|
+
perform experiments. Further documentation is currently in development.
|
16
|
+
|
8
17
|
Quick Setup
|
9
18
|
'''''''''''
|
10
19
|
|
@@ -28,6 +37,16 @@ Install ``metacountregressor`` using pip as follows:
|
|
28
37
|
from metacountregressor.metaheuristics import (harmony_search,
|
29
38
|
differential_evolution,
|
30
39
|
simulated_annealing)
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
|
44
|
+
.. parsed-literal::
|
45
|
+
|
46
|
+
loaded standard packages
|
47
|
+
loaded helper
|
48
|
+
testing
|
49
|
+
|
31
50
|
|
32
51
|
Basic setup.
|
33
52
|
^^^^^^^^^^^^
|
@@ -52,7 +71,7 @@ the Pareto frontier.
|
|
52
71
|
|
53
72
|
#some example argument, these are defualt so the following line is just for claritity. See the later agruments section for detials.
|
54
73
|
arguments = {'algorithm': 'hs', 'test_percentage': 0.15, 'test_complexity': 6, 'instance_number':1,
|
55
|
-
'val_percentage':0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "
|
74
|
+
'val_percentage':0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "_max_time": 6}
|
56
75
|
# Fit the model with metacountregressor
|
57
76
|
obj_fun = ObjectiveFunction(X, y, **arguments)
|
58
77
|
#replace with other metaheuristics if desired
|
@@ -80,7 +99,7 @@ Note: Please Consider the main arguments to change.
|
|
80
99
|
complexities are further explained later in this document.
|
81
100
|
- ``instance_number``: This parameter is used to give a name to the
|
82
101
|
outputs.
|
83
|
-
- ``
|
102
|
+
- ``_obj_1``: This parameter has multiple choices for obj_1, such as
|
84
103
|
�bic�, �aic�, and �hqic�. Only one choice should be defined as a
|
85
104
|
string value.
|
86
105
|
- ``_obj_2``: This parameter has multiple choices for objective 2, such
|
@@ -103,8 +122,8 @@ Note: Please Consider the main arguments to change.
|
|
103
122
|
valid options include: �Normal�, �LnNormal�, �Triangular�, and
|
104
123
|
�Uniform�.
|
105
124
|
|
106
|
-
|
107
|
-
|
125
|
+
Example of changing the arguments:
|
126
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
108
127
|
|
109
128
|
Modify the arguments according to your preferences using the commented
|
110
129
|
code as a guide.
|
@@ -139,16 +158,18 @@ modeling components may completely replace the initial solution.
|
|
139
158
|
|
140
159
|
.. code:: ipython3
|
141
160
|
|
142
|
-
#Model Decisions, Specify for
|
161
|
+
#Model Decisions, Specify for initial solution that will be optimised.
|
143
162
|
manual_fit_spec = {
|
144
163
|
'fixed_terms': ['SINGLE', 'LENGTH'],
|
145
164
|
'rdm_terms': ['AADT:normal'],
|
146
|
-
'rdm_cor_terms': ['GRADEBR:
|
165
|
+
'rdm_cor_terms': ['GRADEBR:normal', 'CURVES:normal'],
|
147
166
|
'grouped_terms': [],
|
148
167
|
'hetro_in_means': ['ACCESS:normal', 'MINRAD:normal'],
|
149
168
|
'transformations': ['no', 'no', 'log', 'no', 'no', 'no', 'no'],
|
150
|
-
'dispersion':
|
169
|
+
'dispersion': 0
|
151
170
|
}
|
171
|
+
|
172
|
+
|
152
173
|
#Search Arguments
|
153
174
|
arguments = {
|
154
175
|
'algorithm': 'hs',
|
@@ -159,7 +180,50 @@ modeling components may completely replace the initial solution.
|
|
159
180
|
}
|
160
181
|
obj_fun = ObjectiveFunction(X, y, **arguments)
|
161
182
|
|
162
|
-
|
183
|
+
|
184
|
+
.. parsed-literal::
|
185
|
+
|
186
|
+
Setup Complete...
|
187
|
+
Benchmaking test with Seed 42
|
188
|
+
--------------------------------------------------------------------------------
|
189
|
+
Log-Likelihood: -1339.1862434675106
|
190
|
+
--------------------------------------------------------------------------------
|
191
|
+
bic: 2732.31
|
192
|
+
--------------------------------------------------------------------------------
|
193
|
+
MSE: 650856.32
|
194
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
195
|
+
| Effect | $\tau$ | Coeff | Std. Err | z-values | Prob |z|>Z |
|
196
|
+
+==========================+========+=======+==========+==========+============+
|
197
|
+
| LENGTH | no | -0.15 | 0.01 | -12.98 | 0.00*** |
|
198
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
199
|
+
| SINGLE | no | -2.46 | 0.04 | -50.00 | 0.00*** |
|
200
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
201
|
+
| GRADEBR | log | 4.23 | 0.10 | 42.17 | 0.00*** |
|
202
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
203
|
+
| CURVES | no | 0.51 | 0.01 | 34.78 | 0.00*** |
|
204
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
205
|
+
| Chol: GRADEBR (Std. | | 2.21 | 0.00 | 50.00 | 0.00*** |
|
206
|
+
| Dev. normal) ) | | | | | |
|
207
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
208
|
+
| Chol: CURVES (Std. Dev. | | -0.51 | 0.00 | -50.00 | 0.00*** |
|
209
|
+
| normal) ) | | | | | |
|
210
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
211
|
+
| Chol: CURVES (Std. Dev. | no | 0.55 | 0.00 | 50.00 | 0.00*** |
|
212
|
+
| normal) . GRADEBR (Std. | | | | | |
|
213
|
+
| Dev. normal ) | | | | | |
|
214
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
215
|
+
| main: MINRAD: hetro | no | -0.00 | 0.00 | -44.36 | 0.00*** |
|
216
|
+
| group 0 | | | | | |
|
217
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
218
|
+
| ACCESS: hetro group 0 | | 0.68 | 0.09 | 7.68 | 0.00*** |
|
219
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
220
|
+
| main: MINRAD: hetro | | -0.00 | 0.00 | -44.86 | 0.00*** |
|
221
|
+
| group 0:normal:sd hetro | | | | | |
|
222
|
+
| group 0 | | | | | |
|
223
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
224
|
+
|
225
|
+
|
226
|
+
Simarly to return the results feed the objective function into a
|
163
227
|
metaheuristic solution algorithm. An example of this is provided below:
|
164
228
|
|
165
229
|
.. code:: ipython3
|
@@ -168,7 +232,7 @@ metaheuristic solution algorithm. An example of this is provided below:
|
|
168
232
|
print(results)
|
169
233
|
|
170
234
|
Notes:
|
171
|
-
|
235
|
+
======
|
172
236
|
|
173
237
|
Capabilities of the software include:
|
174
238
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
@@ -199,8 +263,8 @@ Capabilities of the software include:
|
|
199
263
|
dataset
|
200
264
|
- Out-of-the-box optimization capability using default metaheuristics
|
201
265
|
|
202
|
-
|
203
|
-
|
266
|
+
Intepreting the output of the model:
|
267
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
204
268
|
|
205
269
|
A regression table is produced. The following text elements are
|
206
270
|
explained: - Std. Dev.: This column appears for effects that are related
|
@@ -208,7 +272,7 @@ to random paramters and displays the assument distributional assumption
|
|
208
272
|
next to it - Chol: This term refers to Cholesky decomposition element,
|
209
273
|
to show the correlation between two random paramaters. The combination
|
210
274
|
of the cholesky element on iyself is equivalent to a normal random
|
211
|
-
parameter. - hetro group
|
275
|
+
parameter. - hetro group: This term represents the heterogeneity group
|
212
276
|
number, which refers all of the contributing factors that share
|
213
277
|
hetrogentiy in the means to each other under the same numbered value. -
|
214
278
|
:math:`\tau`: This column, displays the type of transformation that was
|
@@ -299,12 +363,16 @@ considered. Example code will be provided later in this guide.
|
|
299
363
|
dependenant on the time after intial population of solutions are
|
300
364
|
generated.
|
301
365
|
|
302
|
-
Example
|
303
|
-
|
366
|
+
Example: Assistance by Harmony Search
|
367
|
+
-------------------------------------
|
304
368
|
|
305
|
-
Let�s
|
306
|
-
define
|
307
|
-
variables that are identified
|
369
|
+
Let�s begin by fitting very simple models and use the structure of these
|
370
|
+
models to define our objectives. Then, we can conduct a more extensive
|
371
|
+
search on the variables that are more frequently identified. For
|
372
|
+
instance, in the case below, the complexity is level 3, indicating that
|
373
|
+
we will consider, at most randomly correlated parameters. This approach
|
374
|
+
is useful for initially identifying a suitable set of contributing
|
375
|
+
factors for our search.
|
308
376
|
|
309
377
|
.. code:: ipython3
|
310
378
|
|
@@ -330,10 +398,16 @@ variables that are identified more commonly
|
|
330
398
|
'_max_time': 10000
|
331
399
|
}
|
332
400
|
obj_fun = ObjectiveFunction(X, y, **arguments)
|
333
|
-
|
334
401
|
results = harmony_search(obj_fun)
|
335
402
|
print(results)
|
336
403
|
|
404
|
+
Paper
|
405
|
+
-----
|
406
|
+
|
407
|
+
The following tutorial is in conjunction with our latest paper. A link
|
408
|
+
the current paper can be found here
|
409
|
+
`MetaCountRegressor <https://www.overleaf.com/read/mszwpwzcxsng#c5eb0c>`__
|
410
|
+
|
337
411
|
Contact
|
338
412
|
-------
|
339
413
|
|
@@ -346,12 +420,12 @@ Citing MetaCountRegressor
|
|
346
420
|
|
347
421
|
Please cite MetaCountRegressor as follows:
|
348
422
|
|
349
|
-
Ahern, Z., Corry P., Paz A. (
|
423
|
+
Ahern, Z., Corry P., Paz A. (2024). MetaCountRegressor [Computer
|
350
424
|
software]. https://pypi.org/project/metacounregressor/
|
351
425
|
|
352
426
|
Or using BibTex as follows:
|
353
427
|
|
354
|
-
\```bibtex @misc{
|
428
|
+
\```bibtex @misc{Ahern2024Meta, author = {Zeke Ahern, Paul Corry and
|
355
429
|
Alexander Paz}, journal = {PyPi}, title = {metacountregressor · PyPI},
|
356
|
-
url = {https://pypi.org/project/metacountregressor/0.1.
|
357
|
-
{
|
430
|
+
url = {https://pypi.org/project/metacountregressor/0.1.80/}, year =
|
431
|
+
{2024}, }
|
@@ -29,6 +29,64 @@ def convert_df_columns_to_binary_and_wide(df):
|
|
29
29
|
|
30
30
|
|
31
31
|
def main(args, **kwargs):
|
32
|
+
'''METACOUNT REGRESSOR TESTING ENVIRONMENT'''
|
33
|
+
import statsmodels.api as sm
|
34
|
+
|
35
|
+
data = sm.datasets.sunspots.load_pandas().data
|
36
|
+
# print(data.exog)
|
37
|
+
data_exog = data['YEAR']
|
38
|
+
data_exog = sm.add_constant(data_exog)
|
39
|
+
data_endog = data['SUNACTIVITY']
|
40
|
+
|
41
|
+
# Instantiate a gamma family model with the default link function.
|
42
|
+
import numpy as np
|
43
|
+
|
44
|
+
gamma_model = sm.NegativeBinomial(data_endog, data_exog)
|
45
|
+
gamma_results = gamma_model.fit()
|
46
|
+
|
47
|
+
print(gamma_results.summary())
|
48
|
+
|
49
|
+
# NOW LET's COMPARE THIS TO METACOUNT REGRESSOR
|
50
|
+
import metacountregressor
|
51
|
+
from importlib.metadata import version
|
52
|
+
print(version('metacountregressor'))
|
53
|
+
import pandas as pd
|
54
|
+
import numpy as np
|
55
|
+
from metacountregressor.solution import ObjectiveFunction
|
56
|
+
from metacountregressor.metaheuristics import (harmony_search,
|
57
|
+
differential_evolution,
|
58
|
+
simulated_annealing)
|
59
|
+
|
60
|
+
# Model Decisions,
|
61
|
+
manual_fit_spec = {
|
62
|
+
|
63
|
+
'fixed_terms': ['const', 'YEAR'],
|
64
|
+
'rdm_terms': [],
|
65
|
+
'rdm_cor_terms': [],
|
66
|
+
'grouped_terms': [],
|
67
|
+
'hetro_in_means': [],
|
68
|
+
'transformations': ['no', 'no'],
|
69
|
+
'dispersion': 1 # Negative Binomial
|
70
|
+
}
|
71
|
+
|
72
|
+
# Arguments
|
73
|
+
arguments = {
|
74
|
+
'algorithm': 'hs',
|
75
|
+
'test_percentage': 0,
|
76
|
+
'test_complexity': 6,
|
77
|
+
'instance_number': 'name',
|
78
|
+
'Manual_Fit': manual_fit_spec
|
79
|
+
}
|
80
|
+
obj_fun = ObjectiveFunction(data_exog, data_endog, **arguments)
|
81
|
+
#exit()
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
|
88
|
+
|
89
|
+
|
32
90
|
print('the args is:', args)
|
33
91
|
print('the kwargs is', kwargs)
|
34
92
|
|
@@ -109,6 +167,16 @@ def main(args, **kwargs):
|
|
109
167
|
'transformations': ['no', 'no', 'no', 'no', 'no', 'no', 'no', 'no'],
|
110
168
|
'dispersion': 1
|
111
169
|
}
|
170
|
+
print('overriding this delete, just want to test the NB')
|
171
|
+
manual_fit_spec = {
|
172
|
+
'fixed_terms': ['const'],
|
173
|
+
'rdm_terms': [],
|
174
|
+
'rdm_cor_terms': [],
|
175
|
+
'grouped_terms': [],
|
176
|
+
'hetro_in_means': [],
|
177
|
+
'transformations': ['no'],
|
178
|
+
'dispersion': 1
|
179
|
+
}
|
112
180
|
|
113
181
|
df = pd.read_csv('./data/Ex-16-3.csv') # read in the data
|
114
182
|
y_df = df[['FREQ']].copy() # only consider crashes
|
@@ -118,7 +186,7 @@ def main(args, **kwargs):
|
|
118
186
|
x_df['Offset'] = np.log(1 + x_df['AADT'] * x_df['LENGTH'] * 365 / 100000000)
|
119
187
|
x_df = x_df.drop(columns=['AADT', 'LENGTH'])
|
120
188
|
|
121
|
-
if args
|
189
|
+
if args.get('seperate_out_factors', 0):
|
122
190
|
|
123
191
|
x_df = helperprocess.as_wide_factor(x_df, keep_original=0,
|
124
192
|
exclude=['INTECHAG', 'CURVES', 'MIMEDSH', 'MXMEDSH', 'SPEED'])
|
@@ -173,8 +241,8 @@ def main(args, **kwargs):
|
|
173
241
|
x_df = x_df.drop(columns = [ 'zonal_ID', 'ln_AADT', 'ln_seg'])
|
174
242
|
x_df['rumble_install_year'] = x_df['rumble_install_year'].astype('category').cat.codes
|
175
243
|
x_df.rename(columns={"rumble_install_year": "has_rumble"}, inplace=True)
|
176
|
-
|
177
|
-
|
244
|
+
except Exception as e:
|
245
|
+
print(e)
|
178
246
|
x_df = df.drop(columns=['Y']) # was dropped postcode
|
179
247
|
|
180
248
|
group_grab = x_df['county']
|
@@ -215,7 +283,6 @@ def main(args, **kwargs):
|
|
215
283
|
else:
|
216
284
|
print('fitting manually')
|
217
285
|
args['Manual_Fit'] = manual_fit_spec
|
218
|
-
|
219
286
|
if args['problem_number'] == str(8) or args['problem_number'] == 8:
|
220
287
|
print('Maine County Dataset.')
|
221
288
|
args['group'] = 'county'
|
@@ -346,10 +413,8 @@ if __name__ == '__main__':
|
|
346
413
|
override = True
|
347
414
|
if override:
|
348
415
|
print('todo turn off, in testing phase')
|
349
|
-
parser.add_argument('-problem_number', default='
|
416
|
+
parser.add_argument('-problem_number', default='4')
|
350
417
|
print('did it make it')
|
351
|
-
|
352
|
-
|
353
418
|
if 'algorithm' not in args:
|
354
419
|
parser.add_argument('-algorithm', type=str, default='hs',
|
355
420
|
help='optimization algorithm')
|
@@ -370,7 +435,7 @@ if __name__ == '__main__':
|
|
370
435
|
parser.print_help()
|
371
436
|
args = vars(parser.parse_args())
|
372
437
|
print(type(args))
|
373
|
-
# TODO add in chi 2 and df in estimation and compare degrees of freedom
|
438
|
+
# TODO add in chi 2 and df in estimation and compare degrees of freedom this needs to be done in solution
|
374
439
|
|
375
440
|
# Print the args.
|
376
441
|
profiler = cProfile.Profile()
|