metacountregressor 0.1.213__tar.gz → 0.1.227__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. metacountregressor-0.1.227/MANIFEST.in +2 -0
  2. {metacountregressor-0.1.213 → metacountregressor-0.1.227}/PKG-INFO +3 -3
  3. metacountregressor-0.1.227/README.md +459 -0
  4. {metacountregressor-0.1.213 → metacountregressor-0.1.227}/metacountregressor.egg-info/PKG-INFO +3 -3
  5. {metacountregressor-0.1.213 → metacountregressor-0.1.227}/metacountregressor.egg-info/SOURCES.txt +3 -0
  6. {metacountregressor-0.1.213 → metacountregressor-0.1.227}/metacountregressor.egg-info/requires.txt +1 -1
  7. {metacountregressor-0.1.213 → metacountregressor-0.1.227}/setup.py +15 -36
  8. metacountregressor-0.1.227/version.txt +1 -0
  9. {metacountregressor-0.1.213 → metacountregressor-0.1.227}/LICENSE.txt +0 -0
  10. {metacountregressor-0.1.213 → metacountregressor-0.1.227}/README.rst +0 -0
  11. {metacountregressor-0.1.213 → metacountregressor-0.1.227}/metacountregressor/__init__.py +0 -0
  12. {metacountregressor-0.1.213 → metacountregressor-0.1.227}/metacountregressor/_device_cust.py +0 -0
  13. {metacountregressor-0.1.213 → metacountregressor-0.1.227}/metacountregressor/app_main.py +0 -0
  14. {metacountregressor-0.1.213 → metacountregressor-0.1.227}/metacountregressor/data_split_helper.py +0 -0
  15. {metacountregressor-0.1.213 → metacountregressor-0.1.227}/metacountregressor/halton.py +0 -0
  16. {metacountregressor-0.1.213 → metacountregressor-0.1.227}/metacountregressor/helperprocess.py +0 -0
  17. {metacountregressor-0.1.213 → metacountregressor-0.1.227}/metacountregressor/main.py +0 -0
  18. {metacountregressor-0.1.213 → metacountregressor-0.1.227}/metacountregressor/main_old.py +0 -0
  19. {metacountregressor-0.1.213 → metacountregressor-0.1.227}/metacountregressor/metaheuristics.py +0 -0
  20. {metacountregressor-0.1.213 → metacountregressor-0.1.227}/metacountregressor/pareto_file.py +0 -0
  21. {metacountregressor-0.1.213 → metacountregressor-0.1.227}/metacountregressor/pareto_logger__plot.py +0 -0
  22. {metacountregressor-0.1.213 → metacountregressor-0.1.227}/metacountregressor/setup.py +0 -0
  23. {metacountregressor-0.1.213 → metacountregressor-0.1.227}/metacountregressor/single_objective_finder.py +0 -0
  24. {metacountregressor-0.1.213 → metacountregressor-0.1.227}/metacountregressor/solution.py +0 -0
  25. {metacountregressor-0.1.213 → metacountregressor-0.1.227}/metacountregressor/test_generated_paper2.py +0 -0
  26. {metacountregressor-0.1.213 → metacountregressor-0.1.227}/metacountregressor.egg-info/dependency_links.txt +0 -0
  27. {metacountregressor-0.1.213 → metacountregressor-0.1.227}/metacountregressor.egg-info/not-zip-safe +0 -0
  28. {metacountregressor-0.1.213 → metacountregressor-0.1.227}/metacountregressor.egg-info/top_level.txt +0 -0
  29. {metacountregressor-0.1.213 → metacountregressor-0.1.227}/setup.cfg +0 -0
  30. {metacountregressor-0.1.213 → metacountregressor-0.1.227}/tests/test.py +0 -0
@@ -0,0 +1,2 @@
1
+ include README.md
2
+ include version.txt
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: metacountregressor
3
- Version: 0.1.213
3
+ Version: 0.1.227
4
4
  Summary: Extensive Testing for Estimation of Data Count Models
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern
7
7
  Author-email: z.ahern@qut.edu.au
8
8
  License: MIT
9
- Requires-Python: >=3.7
9
+ Requires-Python: >=3.9
10
10
  Description-Content-Type: text/markdown
11
11
  License-File: LICENSE.txt
12
12
  Requires-Dist: numpy>=1.13.1
@@ -14,7 +14,7 @@ Requires-Dist: scipy>=1.0.0
14
14
  Requires-Dist: requests
15
15
  Requires-Dist: latextable
16
16
  Requires-Dist: pandas
17
- Requires-Dist: scikit_learn>=1.4.1.post1
17
+ Requires-Dist: scikit_learn
18
18
  Requires-Dist: statsmodels
19
19
  Requires-Dist: psutil
20
20
  Dynamic: author
@@ -0,0 +1,459 @@
1
+ <div style="display: flex; align-items: center;">
2
+ <img src="https://github.com/zahern/data/raw/main/m.png" alt="My Image" style="width: 100px; margin-right: 20px;">
3
+ <p><span style="font-size: 60px;"><strong>MetaCountRegressor</strong></span></p>
4
+ </div>
5
+
6
+ # Tutorial also available as a jupyter notebook
7
+ [Download Example Notebook](https://github.com/zahern/CountDataEstimation/blob/main/Tutorial.ipynb)
8
+
9
+ The tutorial provides more extensive examples on how to run the code and perform experiments. Further documentation is currently in development.
10
+
11
+ # For an Application Setup Download the following GUI
12
+ [Download Application](https://github.com/zahern/MetaCount/tree/master/metacountregressor/application_gui/dist/meta_app)
13
+
14
+ The application involves setting up a problem instance to run the models.
15
+
16
+ ### Entire [Git Repository](https://github.com/zahern/MetaCount.git) is available to clone.
17
+ #### Steps
18
+ 1. Clone Project
19
+ 2. Navigate to "metacountregressor/application_gui/dist/meta_app"
20
+ 3. Run meta_app.exe
21
+ 4. Navigate to metacountregressor/app_main.py
22
+ 5. Run app_main.py
23
+
24
+
25
+ ## Setup For Python Package Approach
26
+ The Below code demonstrates how to set up automatic optimization assisted by the harmony search algorithm. References to the Differential Evolution and Simulated Annealing has been mentioned (change accordingly)
27
+
28
+ ## Install: Requires Python 3.10
29
+
30
+ Install `metacountregressor` using pip as follows:
31
+
32
+ ```bash
33
+ pip install metacountregressor
34
+
35
+
36
+ ```python
37
+ import pandas as pd
38
+ import numpy as np
39
+ from metacountregressor.solution import ObjectiveFunction
40
+ from metacountregressor.metaheuristics import (harmony_search,
41
+ differential_evolution,
42
+ simulated_annealing)
43
+
44
+
45
+ ```
46
+
47
+ loaded standard packages
48
+ loaded helper
49
+ testing
50
+
51
+
52
+ #### Basic setup.
53
+ The initial setup involves reading in the data and selecting an optimization algorithm. As the runtime progresses, new solutions will be continually evaluated. Finally, at the end of the runtime, the best solution will be identified and printed out. In the case of multiple objectives all of the best solutions will be printed out that belong to the Pareto frontier.
54
+
55
+
56
+ ```python
57
+ # Read data from CSV file
58
+ df = pd.read_csv(
59
+ "https://raw.githubusercontent.com/zahern/data/main/Ex-16-3.csv")
60
+ X = df
61
+ y = df['FREQ'] # Frequency of crashes
62
+ X['Offset'] = np.log(df['AADT']) # Explicitley define how to offset the data, no offset otherwise
63
+ # Drop Y, selected offset term and ID as there are no panels
64
+ X = df.drop(columns=['FREQ', 'ID', 'AADT'])
65
+
66
+ #some example argument, these are defualt so the following line is just for claritity. See the later agruments section for detials.
67
+ arguments = {'algorithm': 'hs', 'test_percentage': 0.15, 'test_complexity': 6, 'instance_number':1,
68
+ 'val_percentage':0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "_max_time": 6}
69
+ # Fit the model with metacountregressor
70
+ obj_fun = ObjectiveFunction(X, y, **arguments)
71
+ #replace with other metaheuristics if desired
72
+ results = harmony_search(obj_fun)
73
+
74
+
75
+ ```
76
+
77
+ ## Arguments to feed into the Objective Function:
78
+ ###
79
+ Note: Please Consider the main arguments to change.
80
+
81
+ - `algorithm`: This parameter has multiple choices for the algorithm, such as 'hs', 'sa', and 'de'. Only one choice should be defined as a string value.
82
+ - `test_percentage`: This parameter represents the percentage of data used for in-sample prediction of the model. The value 0.15 corresponds to 15% of the data.
83
+ - `val_percentage`: This parameter represents the percentage of data used to validate the model. The value 0.15 corresponds to 15% of the data.
84
+ - `test_complexity`: This parameter defines the complexity level for testing. The value 6 tests all complexities. Alternatively, you can provide a list of numbers to consider different complexities. The complexities are further explained later in this document.
85
+ - `instance_number`: This parameter is used to give a name to the outputs.
86
+ - `_obj_1`: This parameter has multiple choices for obj_1, such as 'bic', 'aic', and 'hqic'. Only one choice should be defined as a string value.
87
+ - `_obj_2`: This parameter has multiple choices for objective 2, such as 'RMSE_TEST', 'MSE_TEST', and 'MAE_TEST'.
88
+ - `_max_time`: This parameter specifies the maximum number of seconds for the total estimation before stopping.
89
+ - `distribution`: This parameter is a list of distributions to consider. Please select all of the available options and put them into a list of valid options if you want to to consider the distribution type for use when modellign with random parameters. The valid options include: 'Normal', 'LnNormal', 'Triangular', and 'Uniform'.
90
+ - `transformations`: This parameters is a list of transformations to consider. Plesee select all of the available options and put them into a list of valid options if you want to consider the transformation type. The valid options include 'Normal', 'LnNormal', 'Triangular', 'Uniform'.
91
+ - `method_ll`: This is a specificication on the type of solvers are avilable to solve the lower level maximum likilihood objective. The valid options include: 'Normal', 'LnNormal', 'Triangular', and 'Uniform'.
92
+
93
+
94
+
95
+ ### Example of changing the arguments:
96
+ Modify the arguments according to your preferences using the commented code as a guide.
97
+
98
+
99
+ ```python
100
+ #Solution Arguments
101
+ arguments = {
102
+ 'algorithm': 'hs', #alternatively input 'de', or 'sa'
103
+ 'is_multi': 1,
104
+ 'test_percentage': 0.2, # used in multi-objective optimisation only. Saves 20% of data for testing.
105
+ 'val_percenetage:': 0.2, # Saves 20% of data for testing.
106
+ 'test_complexity': 6, # Complexity level for testing (6 tests all) or a list to consider potential differences in complexity
107
+ 'instance_number': 'name', # used for creeating a named folder where your models are saved into from the directory
108
+ 'distribution': ['Normal', 'LnNormal', 'Triangular', 'Uniform'],
109
+ 'Model': [0,1], # or equivalently ['POS', 'NB']
110
+ 'transformations': ['no', 'sqrt', 'archsinh'],
111
+ 'method_ll': 'BFGS_2',
112
+ '_max_time': 10
113
+ }
114
+ obj_fun = ObjectiveFunction(X, y, **arguments)
115
+ results = harmony_search(obj_fun)
116
+ ```
117
+
118
+ ## Initial Solution Configurement
119
+ Listed below is an example of how to specify an initial solution within the framework. This initial solution will be used to calculate the fitness and considered in the objective-based search. However, as the search progresses, different hypotheses may be proposed, and alternative modeling components may completely replace the initial solution.
120
+
121
+
122
+ ```python
123
+ #Model Decisions, Specify for initial solution that will be optimised.
124
+ manual_fit_spec = {
125
+ 'fixed_terms': ['SINGLE', 'LENGTH'],
126
+ 'rdm_terms': ['AADT:normal'],
127
+ 'rdm_cor_terms': ['GRADEBR:normal', 'CURVES:normal'],
128
+ 'grouped_terms': [],
129
+ 'hetro_in_means': ['ACCESS:normal', 'MINRAD:normal'],
130
+ 'transformations': ['no', 'no', 'log', 'no', 'no', 'no', 'no'],
131
+ 'dispersion': 0
132
+ }
133
+
134
+
135
+ #Search Arguments
136
+ arguments = {
137
+ 'algorithm': 'hs',
138
+ 'test_percentage': 0.2,
139
+ 'test_complexity': 6,
140
+ 'instance_number': 'name',
141
+ 'Manual_Fit': manual_fit_spec
142
+ }
143
+ obj_fun = ObjectiveFunction(X, y, **arguments)
144
+ ```
145
+
146
+ Setup Complete...
147
+ Benchmaking test with Seed 42
148
+ --------------------------------------------------------------------------------
149
+ Log-Likelihood: -1339.1862434675106
150
+ --------------------------------------------------------------------------------
151
+ bic: 2732.31
152
+ --------------------------------------------------------------------------------
153
+ MSE: 650856.32
154
+ +--------------------------+--------+-------+----------+----------+------------+
155
+ | Effect | $\tau$ | Coeff | Std. Err | z-values | Prob |z|>Z |
156
+ +==========================+========+=======+==========+==========+============+
157
+ | LENGTH | no | -0.15 | 0.01 | -12.98 | 0.00*** |
158
+ +--------------------------+--------+-------+----------+----------+------------+
159
+ | SINGLE | no | -2.46 | 0.04 | -50.00 | 0.00*** |
160
+ +--------------------------+--------+-------+----------+----------+------------+
161
+ | GRADEBR | log | 4.23 | 0.10 | 42.17 | 0.00*** |
162
+ +--------------------------+--------+-------+----------+----------+------------+
163
+ | CURVES | no | 0.51 | 0.01 | 34.78 | 0.00*** |
164
+ +--------------------------+--------+-------+----------+----------+------------+
165
+ | Chol: GRADEBR (Std. | | 2.21 | 0.00 | 50.00 | 0.00*** |
166
+ | Dev. normal) ) | | | | | |
167
+ +--------------------------+--------+-------+----------+----------+------------+
168
+ | Chol: CURVES (Std. Dev. | | -0.51 | 0.00 | -50.00 | 0.00*** |
169
+ | normal) ) | | | | | |
170
+ +--------------------------+--------+-------+----------+----------+------------+
171
+ | Chol: CURVES (Std. Dev. | no | 0.55 | 0.00 | 50.00 | 0.00*** |
172
+ | normal) . GRADEBR (Std. | | | | | |
173
+ | Dev. normal ) | | | | | |
174
+ +--------------------------+--------+-------+----------+----------+------------+
175
+ | main: MINRAD: hetro | no | -0.00 | 0.00 | -44.36 | 0.00*** |
176
+ | group 0 | | | | | |
177
+ +--------------------------+--------+-------+----------+----------+------------+
178
+ | ACCESS: hetro group 0 | | 0.68 | 0.09 | 7.68 | 0.00*** |
179
+ +--------------------------+--------+-------+----------+----------+------------+
180
+ | main: MINRAD: hetro | | -0.00 | 0.00 | -44.86 | 0.00*** |
181
+ | group 0:normal:sd hetro | | | | | |
182
+ | group 0 | | | | | |
183
+ +--------------------------+--------+-------+----------+----------+------------+
184
+
185
+
186
+ Simarly to return the results feed the objective function into a metaheuristic solution algorithm. An example of this is provided below:
187
+
188
+
189
+ ```python
190
+ results = harmony_search(obj_fun)
191
+ print(results)
192
+ ```
193
+
194
+ # Notes:
195
+ ### Capabilities of the software include:
196
+ * Handling of Panel Data
197
+ * Support for Data Transformations
198
+ * Implementation of Models with Correlated and Non-Correlated Random Parameters
199
+ * A variety of mixing distributions for parameter estimations, including normal, lognormal, truncated normal, Lindley, Gamma, triangular, and uniform distributions
200
+ Capability to handle heterogeneity in the means of the random parameters
201
+ * Use of Halton draws for simulated maximum likelihood estimation
202
+ * Support for grouped random parameters with unbalanced groups
203
+ * Post-estimation tools for assessing goodness of fit, making predictions, and conducting out-of-sample validation
204
+ * Multiple parameter optimization routines, such as the BFGS method
205
+ * Comprehensive hypothesis testing using single objectives, such as in-sample BIC and log-likelihood
206
+ * Extensive hypothesis testing using multiple objectives, such as in-sample BIC and out-of-sample MAE (Mean Absolute Error), or in-sample AIC and out-of-sample MSPE (mean-square prediction errorr)
207
+ * Features that allow analysts to pre-specify variables, interactions, and mixing distributions, among others
208
+ * Meta-heuristic Guided Optimization, including techniques like Simulated Annealing, Harmony Search, and Differential Evolution
209
+ * Customization of Hyper-parameters to solve problems tailored to your dataset
210
+ * Out-of-the-box optimization capability using default metaheuristics
211
+
212
+ ### Intepreting the output of the model:
213
+ A regression table is produced. The following text elements are explained:
214
+ - Std. Dev.: This column appears for effects that are related to random paramters and displays the assument distributional assumption next to it
215
+ - Chol: This term refers to Cholesky decomposition element, to show the correlation between two random paramaters. The combination of the cholesky element on iyself is equivalent to a normal random parameter.
216
+ - hetro group: This term represents the heterogeneity group number, which refers all of the contributing factors that share hetrogentiy in the means to each other under the same numbered value.
217
+ - $\tau$: This column, displays the type of transformation that was applied to the specific contributing factor in the data.
218
+
219
+
220
+ ## Arguments:
221
+ #### In reference to the arguments that can be fed into the solution alrogithm, a dictionary system is utilised with relecant names these include
222
+
223
+
224
+ The following list describes the arguments available in this function. By default, all of the capabilities described are enabled unless specified otherwise as an argument. For list arguments, include all desired elements in the list to ensure the corresponding options are considered. Example code will be provided later in this guide.
225
+
226
+ 1. **`complexity_level`**: This argument accepts an integer 1-6 or a list based of integegers between 0 to 5 eg might be a possible configuration [0, 2, 3]. Each integer represents a hierarchy level for estimable models associated with each explanatory variable. Here is a summary of the hierarchy:
227
+ - 0: Null model
228
+ - 1: Simple fixed effects model
229
+ - 2: Random parameters model
230
+ - 3: Random correlated parameters model
231
+ - 4: Grouped random parameters model
232
+ - 5: Heterogeneity in the means random parameter model
233
+
234
+ **Note:** For the grouped random parameters model, groupings need to be defined prior to estimation. This can be achieved by including the following key-value pair in the arguments of the `ObjectiveFunction`: `'group': "Enter Column Grouping in data"`. Replace `"Enter Column Grouping in data"` with the actual column grouping in your dataset.
235
+
236
+ Similarly, for panel data, the panel column needs to be defined using the key-value pair: `'panel': "enter column string covering panels"`. Replace `"enter column string covering panels"` with the appropriate column string that represents the panel information in your dataset.
237
+
238
+ 2. **`distributions`**: This argument accepts a list of strings where each string corresponds to a distribution. Valid options include:
239
+ - "Normal"
240
+ - "Lindley"
241
+ - "Uniform"
242
+ - "LogNormal"
243
+ - "Triangular"
244
+ - "Gamma"
245
+ - "TruncatedNormal"
246
+ - Any of the above, concatenated with ":" (e.g., "Normal:grouped"; requires a grouping term defined in the model)
247
+
248
+ 3. **`Model`**: This argument specifies the model form. It can be a list of integers representing different models to test:
249
+ - 0: Poisson
250
+ - 1: Negative-Binomial
251
+ - 2: Generalized-Poisson
252
+
253
+ 4. **`transformations`**: This argument accepts a list of strings representing available transformations within the framework. Valid options include:
254
+ - "no"
255
+ - "square-root"
256
+ - "logarithmic"
257
+ - "archsinh"
258
+ - "as_factor"
259
+
260
+ 5. **`is_multi`**: This argument accepts an integer indicating whether single or multiple objectives are to be tested (0 for single, 1 for multiple).
261
+
262
+ 6. **`test_percentage`**: This argument is used for multi-objective optimization. Define it as a decimal; for example, 0.2 represents 20% of the data for testing.
263
+
264
+ 7. **`val_percentage`**: This argument saves data for validation. Define it as a decimal; for example, 0.2 represents 20% of the data for validation.
265
+
266
+ 8. **`_max_time`**: This argument is used to add a termination time in the algorithm. It takes values as seconds. Note the time is only dependenant on the time after intial population of solutions are generated.
267
+
268
+ ## Example: Assistance by Harmony Search
269
+
270
+
271
+ Let's begin by fitting very simple models and use the structure of these models to define our objectives. Then, we can conduct a more extensive search on the variables that are more frequently identified. For instance, in the case below, the complexity is level 3, indicating that we will consider, at most randomly correlated parameters. This approach is useful for initially identifying a suitable set of contributing factors for our search.
272
+
273
+
274
+
275
+ ```python
276
+
277
+ '''Setup Data'''
278
+ df = pd.read_csv(
279
+ "https://raw.githubusercontent.com/zahern/data/main/Ex-16-3.csv")
280
+ X = df
281
+ y = df['FREQ'] # Frequency of crashes
282
+ X['Offset'] = np.log(df['AADT']) # Explicitley define how to offset the data, no offset otherwise
283
+ # Drop Y, selected offset term and ID as there are no panels
284
+ X = df.drop(columns=['FREQ', 'ID', 'AADT'])
285
+ '''Aguments for Solution'''
286
+ arguments = {
287
+ 'is_multi': 1, #is two objectives considered
288
+ 'test_percentage': 0.2, # used in multi-objective optimisation only. Saves 20% of data for testing.
289
+ 'val_percentage:': 0.2, # Saves 20% of data for testing.
290
+ 'test_complexity': 3, # For Very simple Models
291
+ 'obj_1': 'BIC', '_obj_2': 'RMSE_TEST',
292
+ 'instance_number': 'hs_run', # used for creeating a named folder where your models are saved into from the directory
293
+ 'distribution': ['Normal'],
294
+ 'Model': [0, 1], # or equivalently ['POS', 'NB']
295
+ 'transformations': ['no', 'sqrt', 'archsinh'],
296
+ '_max_time': 10000
297
+ } '''Arguments for the solution algorithm'''
298
+ argument_hs = {
299
+ '_hms': 20, #harmony memory size,
300
+ '_mpai': 1, #adjustement inded
301
+ '_par': 0.3,
302
+ '_hmcr': .5
303
+ }
304
+ obj_fun = ObjectiveFunction(X, y, **arguments)
305
+ results = harmony_search(obj_fun, None, argument_hs)
306
+ print(results)
307
+ ```
308
+
309
+ ## Example: Assistance by Differential Evololution and Simulated Annealing
310
+ Similiar to the above example we only need to change the hyperparamaters, the obj_fun can remane the same
311
+
312
+
313
+ ```python
314
+ argument_de = {'_AI': 2,
315
+ '_crossover_perc': .2,
316
+ '_max_iter': 1000,
317
+ '_pop_size': 25
318
+ }
319
+ de_results = differential_evolution(obj_fun, None, **argument_de)
320
+ print(de_results)
321
+
322
+
323
+ args_sa = {'alpha': .99,
324
+ 'STEPS_PER_TEMP': 10,
325
+ 'INTL_ACPT': 0.5,
326
+ '_crossover_perc': .3,
327
+ 'MAX_ITERATIONS': 1000,
328
+ '_num_intl_slns': 25,
329
+ }
330
+
331
+ sa_results = simulated_annealing(obj_fun, None, **args_sa)
332
+ print(sa_results)
333
+ ```
334
+
335
+ ## Comparing to statsmodels
336
+ The following example illustrates how the output compares to well-known packages, including Statsmodels."
337
+
338
+
339
+ ```python
340
+ # Load modules and data
341
+ import statsmodels.api as sm
342
+
343
+ data = sm.datasets.sunspots.load_pandas().data
344
+ #print(data.exog)
345
+ data_exog = data['YEAR']
346
+ data_exog = sm.add_constant(data_exog)
347
+ data_endog = data['SUNACTIVITY']
348
+
349
+ # Instantiate a gamma family model with the default link function.
350
+ import numpy as np
351
+
352
+ gamma_model = sm.NegativeBinomial(data_endog, data_exog)
353
+ gamma_results = gamma_model.fit()
354
+
355
+ print(gamma_results.summary())
356
+
357
+
358
+
359
+
360
+ #NOW LET's COMPARE THIS TO METACOUNTREGRESSOR
361
+
362
+
363
+
364
+
365
+ #Model Decisions,
366
+ manual_fit_spec = {
367
+ 'fixed_terms': ['const','YEAR'],
368
+ 'rdm_terms': [],
369
+ 'rdm_cor_terms': [],
370
+ 'grouped_terms': [],
371
+ 'hetro_in_means': [],
372
+ 'transformations': ['no', 'no'],
373
+ 'dispersion': 1 #Negative Binomial
374
+ }
375
+
376
+
377
+ #Arguments
378
+ arguments = {
379
+ 'algorithm': 'hs',
380
+ 'test_percentage': 0,
381
+ 'test_complexity': 6,
382
+ 'instance_number': 'name',
383
+ 'Manual_Fit': manual_fit_spec
384
+ }
385
+ obj_fun = ObjectiveFunction(data_exog, data_endog, **arguments)
386
+
387
+
388
+
389
+
390
+
391
+
392
+
393
+ ```
394
+
395
+ Optimization terminated successfully.
396
+ Current function value: 4.877748
397
+ Iterations: 22
398
+ Function evaluations: 71
399
+ Gradient evaluations: 70
400
+ NegativeBinomial Regression Results
401
+ ==============================================================================
402
+ Dep. Variable: SUNACTIVITY No. Observations: 309
403
+ Model: NegativeBinomial Df Residuals: 307
404
+ Method: MLE Df Model: 1
405
+ Date: Tue, 13 Aug 2024 Pseudo R-squ.: 0.004087
406
+ Time: 14:13:22 Log-Likelihood: -1507.2
407
+ converged: True LL-Null: -1513.4
408
+ Covariance Type: nonrobust LLR p-value: 0.0004363
409
+ ==============================================================================
410
+ coef std err z P>|z| [0.025 0.975]
411
+ ------------------------------------------------------------------------------
412
+ const 0.2913 1.017 0.287 0.774 -1.701 2.284
413
+ YEAR 0.0019 0.001 3.546 0.000 0.001 0.003
414
+ alpha 0.7339 0.057 12.910 0.000 0.622 0.845
415
+ ==============================================================================
416
+ 0.1.88
417
+ Setup Complete...
418
+ Benchmaking test with Seed 42
419
+ 1
420
+ --------------------------------------------------------------------------------
421
+ Log-Likelihood: -1509.0683662284273
422
+ --------------------------------------------------------------------------------
423
+ bic: 3035.84
424
+ --------------------------------------------------------------------------------
425
+ MSE: 10000000.00
426
+ +--------+--------+-------+----------+----------+------------+
427
+ | Effect | $\tau$ | Coeff | Std. Err | z-values | Prob |z|>Z |
428
+ +========+========+=======+==========+==========+============+
429
+ | const | no | 0.10 | 0.25 | 0.39 | 0.70 |
430
+ +--------+--------+-------+----------+----------+------------+
431
+ | YEAR | no | 0.00 | 0.00 | 20.39 | 0.00*** |
432
+ +--------+--------+-------+----------+----------+------------+
433
+ | nb | | 1.33 | 0.00 | 50.00 | 0.00*** |
434
+ +--------+--------+-------+----------+----------+------------+
435
+
436
+
437
+ ## Paper
438
+
439
+ The following tutorial is in conjunction with our latest paper. A link the current paper can be found here [MetaCountRegressor](https://www.overleaf.com/read/mszwpwzcxsng#c5eb0c)
440
+
441
+ ## Contact
442
+ If you have any questions, ideas to improve MetaCountRegressor, or want to report a bug, just open a new issue in [GitHub repository](https://github.com/zahern/CountDataEstimation).
443
+
444
+ ## Citing MetaCountRegressor
445
+ Please cite MetaCountRegressor as follows:
446
+
447
+ Ahern, Z., Corry P., Paz A. (2024). MetaCountRegressor [Computer software]. [https://pypi.org/project/metacounregressor/](https://pypi.org/project/metacounregressor/)
448
+
449
+ Or using BibTex as follows:
450
+
451
+ ```bibtex
452
+ @misc{Ahern2024Meta,
453
+ author = {Zeke Ahern, Paul Corry and Alexander Paz},
454
+ journal = {PyPi},
455
+ title = {metacountregressor · PyPI},
456
+ url = {https://pypi.org/project/metacountregressor/0.1.80/},
457
+ year = {2024},
458
+ }
459
+
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: metacountregressor
3
- Version: 0.1.213
3
+ Version: 0.1.227
4
4
  Summary: Extensive Testing for Estimation of Data Count Models
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern
7
7
  Author-email: z.ahern@qut.edu.au
8
8
  License: MIT
9
- Requires-Python: >=3.7
9
+ Requires-Python: >=3.9
10
10
  Description-Content-Type: text/markdown
11
11
  License-File: LICENSE.txt
12
12
  Requires-Dist: numpy>=1.13.1
@@ -14,7 +14,7 @@ Requires-Dist: scipy>=1.0.0
14
14
  Requires-Dist: requests
15
15
  Requires-Dist: latextable
16
16
  Requires-Dist: pandas
17
- Requires-Dist: scikit_learn>=1.4.1.post1
17
+ Requires-Dist: scikit_learn
18
18
  Requires-Dist: statsmodels
19
19
  Requires-Dist: psutil
20
20
  Dynamic: author
@@ -1,7 +1,10 @@
1
1
  LICENSE.txt
2
+ MANIFEST.in
3
+ README.md
2
4
  README.rst
3
5
  setup.cfg
4
6
  setup.py
7
+ version.txt
5
8
  metacountregressor/__init__.py
6
9
  metacountregressor/_device_cust.py
7
10
  metacountregressor/app_main.py
@@ -3,6 +3,6 @@ scipy>=1.0.0
3
3
  requests
4
4
  latextable
5
5
  pandas
6
- scikit_learn>=1.4.1.post1
6
+ scikit_learn
7
7
  statsmodels
8
8
  psutil
@@ -1,77 +1,56 @@
1
1
  import os
2
+ import setuptools
2
3
 
3
4
  # Get the directory of the current script
4
5
  current_dir = os.path.dirname(os.path.abspath(__file__))
5
6
 
6
7
  # Construct the full path to the README.md file
7
8
  readme_path = os.path.join(current_dir, 'README.md')
8
- #import requests
9
- import setuptools
10
9
 
11
- '''
12
- def get_package_version(package_name):
13
- """
14
- Fetch the latest version of a package from PyPI.
15
- """
16
- url = f"https://pypi.org/pypi/{package_name}/json"
17
- response = requests.get(url)
18
- if response.status_code == 200:
19
- data = response.json()
20
- return data['info']['version']
21
- else:
22
- return None
23
- '''
10
+ # Check if README.md exists
11
+ if not os.path.exists(readme_path):
12
+ raise FileNotFoundError("README.md file is missing. Please ensure it exists in the project root.")
13
+
24
14
  # Read the README.md file for the long description
25
15
  with open(readme_path, 'r', encoding='utf-8') as fh:
26
16
  long_description = fh.read()
27
- '''
28
- package_name = 'metacountregressor'
29
- current_version = get_package_version(package_name)
30
- if current_version:
31
- print(f"The current version of {package_name} is {current_version}")
32
- else:
33
- '''
34
- with open('version.txt', 'r') as f:
35
- # current_version = get_version from pupi
36
- current_version = f.read().strip()
37
-
38
17
 
18
+ # Read the current version from version.txt
19
+ with open('version.txt', 'r') as f:
20
+ current_version = f.read().strip()
39
21
 
40
- # Split the current version into its components
22
+ # Increment the patch version
41
23
  version_parts = current_version.split('.')
42
24
  major, minor, patch = map(int, version_parts)
43
-
44
- # Increment the patch version
45
25
  patch += 1
46
-
47
- # Construct the new version string
48
26
  new_version = f"{major}.{minor}.{patch}"
49
27
 
50
- # Write the new version number back to the file
28
+ # Write the new version back to version.txt
51
29
  with open('version.txt', 'w') as f:
52
30
  f.write(new_version)
53
31
 
32
+ # Setup configuration
54
33
  setuptools.setup(
55
34
  name='metacountregressor',
56
35
  version=new_version,
57
36
  description='Extensive Testing for Estimation of Data Count Models',
58
37
  long_description=long_description,
59
- long_description_content_type='text/markdown', # Specify the content type as Markdown
38
+ long_description_content_type='text/markdown', # Specify Markdown content
60
39
  url='https://github.com/zahern/CountDataEstimation',
61
40
  author='Zeke Ahern',
62
41
  author_email='z.ahern@qut.edu.au',
63
42
  license='MIT',
64
43
  packages=['metacountregressor'],
65
44
  zip_safe=False,
66
- python_requires='>=3.7',
45
+ python_requires='>=3.9',
67
46
  install_requires=[
68
47
  'numpy>=1.13.1',
69
48
  'scipy>=1.0.0',
70
49
  'requests',
71
50
  'latextable',
72
51
  'pandas',
73
- 'scikit_learn>=1.4.1.post1',
52
+ 'scikit_learn',
74
53
  'statsmodels',
75
54
  'psutil'
76
55
  ]
77
- )
56
+ )
@@ -0,0 +1 @@
1
+ 0.1.227