metacountregressor 0.1.86__tar.gz → 0.1.96__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {metacountregressor-0.1.86 → metacountregressor-0.1.96}/PKG-INFO +143 -8
- {metacountregressor-0.1.86 → metacountregressor-0.1.96}/README.rst +150 -8
- {metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor/main.py +58 -0
- {metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor/solution.py +67 -45
- {metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor.egg-info/PKG-INFO +143 -8
- {metacountregressor-0.1.86 → metacountregressor-0.1.96}/LICENSE.txt +0 -0
- {metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor/__init__.py +0 -0
- {metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor/_device_cust.py +0 -0
- {metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor/data_split_helper.py +0 -0
- {metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor/halton.py +0 -0
- {metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor/helperprocess.py +0 -0
- {metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor/main_old.py +0 -0
- {metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor/metaheuristics.py +0 -0
- {metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor/pareto_file.py +0 -0
- {metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor/pareto_logger__plot.py +0 -0
- {metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor/setup.py +0 -0
- {metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor/single_objective_finder.py +0 -0
- {metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor/test_generated_paper2.py +0 -0
- {metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor.egg-info/SOURCES.txt +0 -0
- {metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor.egg-info/dependency_links.txt +0 -0
- {metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor.egg-info/not-zip-safe +0 -0
- {metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor.egg-info/requires.txt +0 -0
- {metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor.egg-info/top_level.txt +0 -0
- {metacountregressor-0.1.86 → metacountregressor-0.1.96}/setup.cfg +0 -0
- {metacountregressor-0.1.86 → metacountregressor-0.1.96}/setup.py +0 -0
- {metacountregressor-0.1.86 → metacountregressor-0.1.96}/tests/test.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: metacountregressor
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.96
|
4
4
|
Summary: Extensions for a Python package for estimation of count models.
|
5
5
|
Home-page: https://github.com/zahern/CountDataEstimation
|
6
6
|
Author: Zeke Ahern
|
@@ -274,6 +274,8 @@ Let's begin by fitting very simple models and use the structure of these models
|
|
274
274
|
|
275
275
|
|
276
276
|
```python
|
277
|
+
|
278
|
+
'''Setup Data'''
|
277
279
|
df = pd.read_csv(
|
278
280
|
"https://raw.githubusercontent.com/zahern/data/main/Ex-16-3.csv")
|
279
281
|
X = df
|
@@ -281,25 +283,158 @@ y = df['FREQ'] # Frequency of crashes
|
|
281
283
|
X['Offset'] = np.log(df['AADT']) # Explicitley define how to offset the data, no offset otherwise
|
282
284
|
# Drop Y, selected offset term and ID as there are no panels
|
283
285
|
X = df.drop(columns=['FREQ', 'ID', 'AADT'])
|
284
|
-
|
286
|
+
'''Aguments for Solution'''
|
285
287
|
arguments = {
|
286
|
-
'
|
287
|
-
'is_multi': 1,
|
288
|
+
'is_multi': 1, #is two objectives considered
|
288
289
|
'test_percentage': 0.2, # used in multi-objective optimisation only. Saves 20% of data for testing.
|
289
290
|
'val_percentage:': 0.2, # Saves 20% of data for testing.
|
290
291
|
'test_complexity': 3, # For Very simple Models
|
291
292
|
'obj_1': 'BIC', '_obj_2': 'RMSE_TEST',
|
292
|
-
'instance_number': '
|
293
|
+
'instance_number': 'hs_run', # used for creeating a named folder where your models are saved into from the directory
|
293
294
|
'distribution': ['Normal'],
|
294
|
-
'Model': [0], # or equivalently ['POS', 'NB']
|
295
|
+
'Model': [0, 1], # or equivalently ['POS', 'NB']
|
295
296
|
'transformations': ['no', 'sqrt', 'archsinh'],
|
296
297
|
'_max_time': 10000
|
297
|
-
|
298
|
+
} '''Arguments for the solution algorithm'''
|
299
|
+
argument_hs = {
|
300
|
+
'_hms': 20, #harmony memory size,
|
301
|
+
'_mpai': 1, #adjustement inded
|
302
|
+
'_par': 0.3,
|
303
|
+
'_hmcr': .5
|
304
|
+
}
|
298
305
|
obj_fun = ObjectiveFunction(X, y, **arguments)
|
299
|
-
results = harmony_search(obj_fun)
|
306
|
+
results = harmony_search(obj_fun, None, argument_hs)
|
300
307
|
print(results)
|
301
308
|
```
|
302
309
|
|
310
|
+
## Example: Assistance by Differential Evololution and Simulated Annealing
|
311
|
+
Similiar to the above example we only need to change the hyperparamaters, the obj_fun can remane the same
|
312
|
+
|
313
|
+
|
314
|
+
```python
|
315
|
+
argument_de = {'_AI': 2,
|
316
|
+
'_crossover_perc': .2,
|
317
|
+
'_max_iter': 1000,
|
318
|
+
'_pop_size': 25
|
319
|
+
}
|
320
|
+
de_results = differential_evolution(obj_fun, None, **argument_de)
|
321
|
+
print(de_results)
|
322
|
+
|
323
|
+
|
324
|
+
args_sa = {'alpha': .99,
|
325
|
+
'STEPS_PER_TEMP': 10,
|
326
|
+
'INTL_ACPT': 0.5,
|
327
|
+
'_crossover_perc': .3,
|
328
|
+
'MAX_ITERATIONS': 1000,
|
329
|
+
'_num_intl_slns': 25,
|
330
|
+
}
|
331
|
+
|
332
|
+
sa_results = simulated_annealing(obj_fun, None, **args_sa)
|
333
|
+
print(sa_results)
|
334
|
+
```
|
335
|
+
|
336
|
+
## Comparing to statsmodels
|
337
|
+
The following example illustrates how the output compares to well-known packages, including Statsmodels."
|
338
|
+
|
339
|
+
|
340
|
+
```python
|
341
|
+
# Load modules and data
|
342
|
+
import statsmodels.api as sm
|
343
|
+
|
344
|
+
data = sm.datasets.sunspots.load_pandas().data
|
345
|
+
#print(data.exog)
|
346
|
+
data_exog = data['YEAR']
|
347
|
+
data_exog = sm.add_constant(data_exog)
|
348
|
+
data_endog = data['SUNACTIVITY']
|
349
|
+
|
350
|
+
# Instantiate a gamma family model with the default link function.
|
351
|
+
import numpy as np
|
352
|
+
|
353
|
+
gamma_model = sm.NegativeBinomial(data_endog, data_exog)
|
354
|
+
gamma_results = gamma_model.fit()
|
355
|
+
|
356
|
+
print(gamma_results.summary())
|
357
|
+
|
358
|
+
|
359
|
+
|
360
|
+
|
361
|
+
#NOW LET's COMPARE THIS TO METACOUNTREGRESSOR
|
362
|
+
|
363
|
+
|
364
|
+
|
365
|
+
|
366
|
+
#Model Decisions,
|
367
|
+
manual_fit_spec = {
|
368
|
+
'fixed_terms': ['const','YEAR'],
|
369
|
+
'rdm_terms': [],
|
370
|
+
'rdm_cor_terms': [],
|
371
|
+
'grouped_terms': [],
|
372
|
+
'hetro_in_means': [],
|
373
|
+
'transformations': ['no', 'no'],
|
374
|
+
'dispersion': 1 #Negative Binomial
|
375
|
+
}
|
376
|
+
|
377
|
+
|
378
|
+
#Arguments
|
379
|
+
arguments = {
|
380
|
+
'algorithm': 'hs',
|
381
|
+
'test_percentage': 0,
|
382
|
+
'test_complexity': 6,
|
383
|
+
'instance_number': 'name',
|
384
|
+
'Manual_Fit': manual_fit_spec
|
385
|
+
}
|
386
|
+
obj_fun = ObjectiveFunction(data_exog, data_endog, **arguments)
|
387
|
+
|
388
|
+
|
389
|
+
|
390
|
+
|
391
|
+
|
392
|
+
|
393
|
+
|
394
|
+
```
|
395
|
+
|
396
|
+
Optimization terminated successfully.
|
397
|
+
Current function value: 4.877748
|
398
|
+
Iterations: 22
|
399
|
+
Function evaluations: 71
|
400
|
+
Gradient evaluations: 70
|
401
|
+
NegativeBinomial Regression Results
|
402
|
+
==============================================================================
|
403
|
+
Dep. Variable: SUNACTIVITY No. Observations: 309
|
404
|
+
Model: NegativeBinomial Df Residuals: 307
|
405
|
+
Method: MLE Df Model: 1
|
406
|
+
Date: Tue, 13 Aug 2024 Pseudo R-squ.: 0.004087
|
407
|
+
Time: 14:13:22 Log-Likelihood: -1507.2
|
408
|
+
converged: True LL-Null: -1513.4
|
409
|
+
Covariance Type: nonrobust LLR p-value: 0.0004363
|
410
|
+
==============================================================================
|
411
|
+
coef std err z P>|z| [0.025 0.975]
|
412
|
+
------------------------------------------------------------------------------
|
413
|
+
const 0.2913 1.017 0.287 0.774 -1.701 2.284
|
414
|
+
YEAR 0.0019 0.001 3.546 0.000 0.001 0.003
|
415
|
+
alpha 0.7339 0.057 12.910 0.000 0.622 0.845
|
416
|
+
==============================================================================
|
417
|
+
0.1.88
|
418
|
+
Setup Complete...
|
419
|
+
Benchmaking test with Seed 42
|
420
|
+
1
|
421
|
+
--------------------------------------------------------------------------------
|
422
|
+
Log-Likelihood: -1509.0683662284273
|
423
|
+
--------------------------------------------------------------------------------
|
424
|
+
bic: 3035.84
|
425
|
+
--------------------------------------------------------------------------------
|
426
|
+
MSE: 10000000.00
|
427
|
+
+--------+--------+-------+----------+----------+------------+
|
428
|
+
| Effect | $\tau$ | Coeff | Std. Err | z-values | Prob |z|>Z |
|
429
|
+
+========+========+=======+==========+==========+============+
|
430
|
+
| const | no | 0.10 | 0.25 | 0.39 | 0.70 |
|
431
|
+
+--------+--------+-------+----------+----------+------------+
|
432
|
+
| YEAR | no | 0.00 | 0.00 | 20.39 | 0.00*** |
|
433
|
+
+--------+--------+-------+----------+----------+------------+
|
434
|
+
| nb | | 1.33 | 0.00 | 50.00 | 0.00*** |
|
435
|
+
+--------+--------+-------+----------+----------+------------+
|
436
|
+
|
437
|
+
|
303
438
|
## Paper
|
304
439
|
|
305
440
|
The following tutorial is in conjunction with our latest paper. A link the current paper can be found here [MetaCountRegressor](https://www.overleaf.com/read/mszwpwzcxsng#c5eb0c)
|
@@ -9,7 +9,7 @@ Tutorial also available as a jupyter notebook
|
|
9
9
|
=============================================
|
10
10
|
|
11
11
|
`Download Example
|
12
|
-
Notebook <https://github.com/zahern/CountDataEstimation/blob/main/
|
12
|
+
Notebook <https://github.com/zahern/CountDataEstimation/blob/main/Tutorial.ipynb>`__
|
13
13
|
|
14
14
|
The tutorial provides more extensive examples on how to run the code and
|
15
15
|
perform experiments. Further documentation is currently in development.
|
@@ -376,6 +376,8 @@ factors for our search.
|
|
376
376
|
|
377
377
|
.. code:: ipython3
|
378
378
|
|
379
|
+
|
380
|
+
'''Setup Data'''
|
379
381
|
df = pd.read_csv(
|
380
382
|
"https://raw.githubusercontent.com/zahern/data/main/Ex-16-3.csv")
|
381
383
|
X = df
|
@@ -383,24 +385,164 @@ factors for our search.
|
|
383
385
|
X['Offset'] = np.log(df['AADT']) # Explicitley define how to offset the data, no offset otherwise
|
384
386
|
# Drop Y, selected offset term and ID as there are no panels
|
385
387
|
X = df.drop(columns=['FREQ', 'ID', 'AADT'])
|
386
|
-
|
388
|
+
'''Aguments for Solution'''
|
387
389
|
arguments = {
|
388
|
-
'
|
389
|
-
'is_multi': 1,
|
390
|
+
'is_multi': 1, #is two objectives considered
|
390
391
|
'test_percentage': 0.2, # used in multi-objective optimisation only. Saves 20% of data for testing.
|
391
392
|
'val_percentage:': 0.2, # Saves 20% of data for testing.
|
392
393
|
'test_complexity': 3, # For Very simple Models
|
393
394
|
'obj_1': 'BIC', '_obj_2': 'RMSE_TEST',
|
394
|
-
'instance_number': '
|
395
|
+
'instance_number': 'hs_run', # used for creeating a named folder where your models are saved into from the directory
|
395
396
|
'distribution': ['Normal'],
|
396
|
-
'Model': [0], # or equivalently ['POS', 'NB']
|
397
|
+
'Model': [0, 1], # or equivalently ['POS', 'NB']
|
397
398
|
'transformations': ['no', 'sqrt', 'archsinh'],
|
398
399
|
'_max_time': 10000
|
399
|
-
|
400
|
+
} '''Arguments for the solution algorithm'''
|
401
|
+
argument_hs = {
|
402
|
+
'_hms': 20, #harmony memory size,
|
403
|
+
'_mpai': 1, #adjustement inded
|
404
|
+
'_par': 0.3,
|
405
|
+
'_hmcr': .5
|
406
|
+
}
|
400
407
|
obj_fun = ObjectiveFunction(X, y, **arguments)
|
401
|
-
results = harmony_search(obj_fun)
|
408
|
+
results = harmony_search(obj_fun, None, argument_hs)
|
402
409
|
print(results)
|
403
410
|
|
411
|
+
Example: Assistance by Differential Evololution and Simulated Annealing
|
412
|
+
-----------------------------------------------------------------------
|
413
|
+
|
414
|
+
Similiar to the above example we only need to change the
|
415
|
+
hyperparamaters, the obj_fun can remane the same
|
416
|
+
|
417
|
+
.. code:: ipython3
|
418
|
+
|
419
|
+
argument_de = {'_AI': 2,
|
420
|
+
'_crossover_perc': .2,
|
421
|
+
'_max_iter': 1000,
|
422
|
+
'_pop_size': 25
|
423
|
+
}
|
424
|
+
de_results = differential_evolution(obj_fun, None, **argument_de)
|
425
|
+
print(de_results)
|
426
|
+
|
427
|
+
|
428
|
+
args_sa = {'alpha': .99,
|
429
|
+
'STEPS_PER_TEMP': 10,
|
430
|
+
'INTL_ACPT': 0.5,
|
431
|
+
'_crossover_perc': .3,
|
432
|
+
'MAX_ITERATIONS': 1000,
|
433
|
+
'_num_intl_slns': 25,
|
434
|
+
}
|
435
|
+
|
436
|
+
sa_results = simulated_annealing(obj_fun, None, **args_sa)
|
437
|
+
print(sa_results)
|
438
|
+
|
439
|
+
Comparing to statsmodels
|
440
|
+
------------------------
|
441
|
+
|
442
|
+
The following example illustrates how the output compares to well-known
|
443
|
+
packages, including Statsmodels.�
|
444
|
+
|
445
|
+
.. code:: ipython3
|
446
|
+
|
447
|
+
# Load modules and data
|
448
|
+
import statsmodels.api as sm
|
449
|
+
|
450
|
+
data = sm.datasets.sunspots.load_pandas().data
|
451
|
+
#print(data.exog)
|
452
|
+
data_exog = data['YEAR']
|
453
|
+
data_exog = sm.add_constant(data_exog)
|
454
|
+
data_endog = data['SUNACTIVITY']
|
455
|
+
|
456
|
+
# Instantiate a gamma family model with the default link function.
|
457
|
+
import numpy as np
|
458
|
+
|
459
|
+
gamma_model = sm.NegativeBinomial(data_endog, data_exog)
|
460
|
+
gamma_results = gamma_model.fit()
|
461
|
+
|
462
|
+
print(gamma_results.summary())
|
463
|
+
|
464
|
+
|
465
|
+
|
466
|
+
|
467
|
+
#NOW LET's COMPARE THIS TO METACOUNTREGRESSOR
|
468
|
+
|
469
|
+
|
470
|
+
|
471
|
+
|
472
|
+
#Model Decisions,
|
473
|
+
manual_fit_spec = {
|
474
|
+
'fixed_terms': ['const','YEAR'],
|
475
|
+
'rdm_terms': [],
|
476
|
+
'rdm_cor_terms': [],
|
477
|
+
'grouped_terms': [],
|
478
|
+
'hetro_in_means': [],
|
479
|
+
'transformations': ['no', 'no'],
|
480
|
+
'dispersion': 1 #Negative Binomial
|
481
|
+
}
|
482
|
+
|
483
|
+
|
484
|
+
#Arguments
|
485
|
+
arguments = {
|
486
|
+
'algorithm': 'hs',
|
487
|
+
'test_percentage': 0,
|
488
|
+
'test_complexity': 6,
|
489
|
+
'instance_number': 'name',
|
490
|
+
'Manual_Fit': manual_fit_spec
|
491
|
+
}
|
492
|
+
obj_fun = ObjectiveFunction(data_exog, data_endog, **arguments)
|
493
|
+
|
494
|
+
|
495
|
+
|
496
|
+
|
497
|
+
|
498
|
+
|
499
|
+
|
500
|
+
|
501
|
+
|
502
|
+
.. parsed-literal::
|
503
|
+
|
504
|
+
Optimization terminated successfully.
|
505
|
+
Current function value: 4.877748
|
506
|
+
Iterations: 22
|
507
|
+
Function evaluations: 71
|
508
|
+
Gradient evaluations: 70
|
509
|
+
NegativeBinomial Regression Results
|
510
|
+
==============================================================================
|
511
|
+
Dep. Variable: SUNACTIVITY No. Observations: 309
|
512
|
+
Model: NegativeBinomial Df Residuals: 307
|
513
|
+
Method: MLE Df Model: 1
|
514
|
+
Date: Tue, 13 Aug 2024 Pseudo R-squ.: 0.004087
|
515
|
+
Time: 14:13:22 Log-Likelihood: -1507.2
|
516
|
+
converged: True LL-Null: -1513.4
|
517
|
+
Covariance Type: nonrobust LLR p-value: 0.0004363
|
518
|
+
==============================================================================
|
519
|
+
coef std err z P>|z| [0.025 0.975]
|
520
|
+
------------------------------------------------------------------------------
|
521
|
+
const 0.2913 1.017 0.287 0.774 -1.701 2.284
|
522
|
+
YEAR 0.0019 0.001 3.546 0.000 0.001 0.003
|
523
|
+
alpha 0.7339 0.057 12.910 0.000 0.622 0.845
|
524
|
+
==============================================================================
|
525
|
+
0.1.88
|
526
|
+
Setup Complete...
|
527
|
+
Benchmaking test with Seed 42
|
528
|
+
1
|
529
|
+
--------------------------------------------------------------------------------
|
530
|
+
Log-Likelihood: -1509.0683662284273
|
531
|
+
--------------------------------------------------------------------------------
|
532
|
+
bic: 3035.84
|
533
|
+
--------------------------------------------------------------------------------
|
534
|
+
MSE: 10000000.00
|
535
|
+
+--------+--------+-------+----------+----------+------------+
|
536
|
+
| Effect | $\tau$ | Coeff | Std. Err | z-values | Prob |z|>Z |
|
537
|
+
+========+========+=======+==========+==========+============+
|
538
|
+
| const | no | 0.10 | 0.25 | 0.39 | 0.70 |
|
539
|
+
+--------+--------+-------+----------+----------+------------+
|
540
|
+
| YEAR | no | 0.00 | 0.00 | 20.39 | 0.00*** |
|
541
|
+
+--------+--------+-------+----------+----------+------------+
|
542
|
+
| nb | | 1.33 | 0.00 | 50.00 | 0.00*** |
|
543
|
+
+--------+--------+-------+----------+----------+------------+
|
544
|
+
|
545
|
+
|
404
546
|
Paper
|
405
547
|
-----
|
406
548
|
|
@@ -29,6 +29,64 @@ def convert_df_columns_to_binary_and_wide(df):
|
|
29
29
|
|
30
30
|
|
31
31
|
def main(args, **kwargs):
|
32
|
+
'''METACOUNT REGRESSOR TESTING ENVIRONMENT'''
|
33
|
+
import statsmodels.api as sm
|
34
|
+
|
35
|
+
data = sm.datasets.sunspots.load_pandas().data
|
36
|
+
# print(data.exog)
|
37
|
+
data_exog = data['YEAR']
|
38
|
+
data_exog = sm.add_constant(data_exog)
|
39
|
+
data_endog = data['SUNACTIVITY']
|
40
|
+
|
41
|
+
# Instantiate a gamma family model with the default link function.
|
42
|
+
import numpy as np
|
43
|
+
|
44
|
+
gamma_model = sm.NegativeBinomial(data_endog, data_exog)
|
45
|
+
gamma_results = gamma_model.fit()
|
46
|
+
|
47
|
+
print(gamma_results.summary())
|
48
|
+
|
49
|
+
# NOW LET's COMPARE THIS TO METACOUNT REGRESSOR
|
50
|
+
import metacountregressor
|
51
|
+
from importlib.metadata import version
|
52
|
+
print(version('metacountregressor'))
|
53
|
+
import pandas as pd
|
54
|
+
import numpy as np
|
55
|
+
from metacountregressor.solution import ObjectiveFunction
|
56
|
+
from metacountregressor.metaheuristics import (harmony_search,
|
57
|
+
differential_evolution,
|
58
|
+
simulated_annealing)
|
59
|
+
|
60
|
+
# Model Decisions,
|
61
|
+
manual_fit_spec = {
|
62
|
+
|
63
|
+
'fixed_terms': ['const', 'YEAR'],
|
64
|
+
'rdm_terms': [],
|
65
|
+
'rdm_cor_terms': [],
|
66
|
+
'grouped_terms': [],
|
67
|
+
'hetro_in_means': [],
|
68
|
+
'transformations': ['no', 'no'],
|
69
|
+
'dispersion': 1 # Negative Binomial
|
70
|
+
}
|
71
|
+
|
72
|
+
# Arguments
|
73
|
+
arguments = {
|
74
|
+
'algorithm': 'hs',
|
75
|
+
'test_percentage': 0,
|
76
|
+
'test_complexity': 6,
|
77
|
+
'instance_number': 'name',
|
78
|
+
'Manual_Fit': manual_fit_spec
|
79
|
+
}
|
80
|
+
obj_fun = ObjectiveFunction(data_exog, data_endog, **arguments)
|
81
|
+
#exit()
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
|
88
|
+
|
89
|
+
|
32
90
|
print('the args is:', args)
|
33
91
|
print('the kwargs is', kwargs)
|
34
92
|
|
@@ -122,8 +122,9 @@ class ObjectiveFunction(object):
|
|
122
122
|
|
123
123
|
def __init__(self, x_data, y_data, **kwargs):
|
124
124
|
|
125
|
-
self.reg_penalty =
|
125
|
+
self.reg_penalty = 0
|
126
126
|
self.power_up_ll = False
|
127
|
+
|
127
128
|
self.bic = None
|
128
129
|
self.other_bic = False
|
129
130
|
self.test_flag = 1
|
@@ -389,6 +390,8 @@ class ObjectiveFunction(object):
|
|
389
390
|
self.initial_sig = 1 # pass the test of a single model
|
390
391
|
self.pvalue_sig_value = .1
|
391
392
|
self.observations = self._x_data.shape[0]
|
393
|
+
self.minimize_scaler = 1/self.observations # scale the minimization function to the observations
|
394
|
+
|
392
395
|
self.batch_size = None
|
393
396
|
# open the file in the write mode
|
394
397
|
self.grab_transforms = 0
|
@@ -842,6 +845,11 @@ class ObjectiveFunction(object):
|
|
842
845
|
return ([self._model_type_codes[dispersion]])
|
843
846
|
|
844
847
|
def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None, obj_1=None, model_nature=None):
|
848
|
+
r'''
|
849
|
+
setup for naming of the model summary
|
850
|
+
'''
|
851
|
+
|
852
|
+
|
845
853
|
self.name_deleter = []
|
846
854
|
group_rpm = None
|
847
855
|
group_dist = []
|
@@ -1014,7 +1022,7 @@ class ObjectiveFunction(object):
|
|
1014
1022
|
signif_list = self.pvalue_asterix_add(self.pvalues)
|
1015
1023
|
if model == 1:
|
1016
1024
|
|
1017
|
-
self.coeff_[-1] = np.
|
1025
|
+
self.coeff_[-1] = 1/np.exp(self.coeff_[-1])
|
1018
1026
|
if self.coeff_[-1] < 0.25:
|
1019
1027
|
print(self.coeff_[-1], 'Warning Check Dispersion')
|
1020
1028
|
print(np.exp(self.coeff_[-1]))
|
@@ -2701,9 +2709,7 @@ class ObjectiveFunction(object):
|
|
2701
2709
|
|
2702
2710
|
|
2703
2711
|
"""
|
2704
|
-
|
2705
|
-
if alpha is None:
|
2706
|
-
alpha = params[-1]
|
2712
|
+
|
2707
2713
|
# Calculate common terms
|
2708
2714
|
'''
|
2709
2715
|
n = len(y)
|
@@ -2742,7 +2748,9 @@ class ObjectiveFunction(object):
|
|
2742
2748
|
|
2743
2749
|
try:
|
2744
2750
|
if alpha is None:
|
2745
|
-
alpha = params[-1]
|
2751
|
+
alpha = np.exp(params[-1])
|
2752
|
+
else:
|
2753
|
+
alpha = np.exp(params[-1])
|
2746
2754
|
a1 = 1 / alpha * mu ** Q
|
2747
2755
|
prob = a1 / (a1 + mu)
|
2748
2756
|
exog = X
|
@@ -3442,24 +3450,44 @@ class ObjectiveFunction(object):
|
|
3442
3450
|
# if gamma <= 0.01: #min defined value for stable nb
|
3443
3451
|
# gamma = 0.01
|
3444
3452
|
|
3453
|
+
|
3454
|
+
|
3455
|
+
|
3445
3456
|
endog = y
|
3446
3457
|
mu = lam
|
3447
|
-
alpha = gamma
|
3448
|
-
size = 1.0 / alpha * mu ** Q
|
3458
|
+
alpha = np.exp(gamma)
|
3459
|
+
#size = 1.0 / alpha * mu ** Q
|
3449
3460
|
alpha_size = alpha * mu ** Q
|
3450
3461
|
# prob = size/(size+mu)
|
3451
3462
|
prob = alpha / (alpha + mu)
|
3452
3463
|
# prob = 1/(1+mu*alpha)
|
3464
|
+
|
3465
|
+
'''test'''
|
3466
|
+
|
3467
|
+
|
3453
3468
|
try:
|
3454
3469
|
# print(np.shape(y),np.shape(size), np.shape(prob))
|
3455
|
-
gg2 = self.negbinom_pmf(alpha_size, size/(size+mu), y)
|
3470
|
+
#gg2 = self.negbinom_pmf(alpha_size, size/(size+mu), y)
|
3471
|
+
#import time
|
3472
|
+
#start_time = time.time()
|
3456
3473
|
|
3474
|
+
|
3475
|
+
# Measure time for negbinom_pmf
|
3476
|
+
#start_time = time.time()
|
3477
|
+
#for _ in range(10000):
|
3478
|
+
|
3479
|
+
#gg = self.negbinom_pmf(alpha_size, prob, y)
|
3480
|
+
#end_time = time.time()
|
3481
|
+
#print("Custom functieon time:", end_time - start_time)
|
3482
|
+
#start_time = time.time()
|
3483
|
+
#for _ in range(10000):
|
3457
3484
|
gg = np.exp(
|
3458
3485
|
gammaln(y + alpha) - gammaln(y + 1) - gammaln(alpha) + y * np.log(mu) + alpha * np.log(alpha) - (
|
3459
3486
|
y + alpha) * np.log(mu + alpha))
|
3460
|
-
|
3461
|
-
#
|
3462
|
-
#
|
3487
|
+
gg[np.isnan(gg)] = 1
|
3488
|
+
#gg = nbinom.pmf(y ,alpha, prob)
|
3489
|
+
#end_time = time.time()
|
3490
|
+
#print("Custom functieon time:", end_time - start_time)
|
3463
3491
|
|
3464
3492
|
except Exception as e:
|
3465
3493
|
print(e)
|
@@ -3530,7 +3558,7 @@ class ObjectiveFunction(object):
|
|
3530
3558
|
|
3531
3559
|
endog = y
|
3532
3560
|
mu = lam
|
3533
|
-
alpha = gamma
|
3561
|
+
alpha = np.exp(gamma)
|
3534
3562
|
alpha = alpha * mu ** Q
|
3535
3563
|
size = 1 / alpha * mu ** Q # also r
|
3536
3564
|
# self.rate_param = size
|
@@ -4428,14 +4456,19 @@ class ObjectiveFunction(object):
|
|
4428
4456
|
if return_gradient_n:
|
4429
4457
|
der, grad_n = self.simple_score_grad(
|
4430
4458
|
betas, y, eVd, Xd, dispersion, both=True)
|
4431
|
-
return (-loglik + penalty, -der, grad_n)
|
4459
|
+
#return (-loglik + penalty, -der, grad_n)*self.minimize_scaler
|
4460
|
+
scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -der.ravel(), grad_n))
|
4461
|
+
return scaled_tuple
|
4432
4462
|
else:
|
4433
4463
|
der = self.simple_score_grad(
|
4434
4464
|
betas, y, eVd, Xd, dispersion, both=False)
|
4435
|
-
|
4436
|
-
|
4465
|
+
scaled_tuple = tuple(
|
4466
|
+
x * self.minimize_scaler for x in (-loglik + penalty, -der.ravel()))
|
4467
|
+
return scaled_tuple
|
4468
|
+
#return (-loglik + penalty, -der.ravel())*self.minimize_scaler
|
4437
4469
|
else:
|
4438
|
-
|
4470
|
+
|
4471
|
+
return (-loglik + penalty)*self.minimize_scaler
|
4439
4472
|
# Else, we have draws
|
4440
4473
|
self.n_obs = len(y) * self.Ndraws #todo is this problematic
|
4441
4474
|
penalty += self._penalty_betas(
|
@@ -4659,34 +4692,18 @@ class ObjectiveFunction(object):
|
|
4659
4692
|
# lik = np.nan_to_num(lik, )
|
4660
4693
|
loglik = np.log(lik)
|
4661
4694
|
llf_main = loglik
|
4662
|
-
if 'exog_infl' in model_nature:
|
4663
|
-
params_infl = betas[Kf:Kf + len(model_nature.get('exog_infl'))]
|
4664
|
-
params_main = Bf
|
4665
|
-
exog_infl = model_nature.get('exog_inflX')
|
4666
|
-
llf_main = llf_main.ravel() # TODO test this
|
4667
|
-
w = self.predict_logit_part(params_infl, exog_infl)
|
4668
|
-
|
4669
|
-
w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
|
4670
|
-
|
4671
|
-
zero_idx = np.nonzero(y == 0)[0]
|
4672
|
-
nonzero_idx = np.nonzero(y)[0] # FIXME should shape be unravelled
|
4673
|
-
|
4674
|
-
llf = np.zeros_like(y, dtype=np.float64).reshape(-1, 1) # TODO test this i added ravel to this code
|
4675
|
-
llf[zero_idx] = (np.log(w[zero_idx] + (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
|
4676
|
-
llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
|
4677
|
-
loglik = llf.sum()
|
4678
|
-
else:
|
4679
4695
|
|
4680
|
-
|
4696
|
+
|
4697
|
+
loglik = loglik.sum()
|
4681
4698
|
|
4682
4699
|
loglik = np.clip(loglik, log_lik_min, log_lik_max)
|
4683
4700
|
if self.power_up_ll:
|
4684
4701
|
penalty += self.regularise_l2(betas)
|
4685
|
-
|
4702
|
+
|
4686
4703
|
penalty += self.regularise_l2(betas)
|
4687
4704
|
if not return_gradient:
|
4688
4705
|
|
4689
|
-
output = (-loglik + penalty,)
|
4706
|
+
output = ((-loglik + penalty)*self.minimize_scaler,)
|
4690
4707
|
if verbose > 1:
|
4691
4708
|
print(
|
4692
4709
|
f"Evaluation {self.total_fun_eval} Log-Lik.={-loglik:.2f}")
|
@@ -4716,19 +4733,24 @@ class ObjectiveFunction(object):
|
|
4716
4733
|
# Hinv = np.linalg.inv(H)
|
4717
4734
|
# except Exception:
|
4718
4735
|
# Hinv = np.linalg.pinv(H)
|
4719
|
-
|
4736
|
+
scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -grad, grad_n))
|
4737
|
+
return scaled_tuple
|
4738
|
+
#output = (-loglik + penalty, -grad, grad_n)*self.minimize_scaler
|
4720
4739
|
|
4721
|
-
return output
|
4740
|
+
#return output
|
4722
4741
|
else:
|
4742
|
+
scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -grad))
|
4743
|
+
return scaled_tuple
|
4744
|
+
#output = (-loglik + penalty, -grad)*self.minimize_scaler
|
4723
4745
|
|
4724
|
-
output
|
4725
|
-
|
4726
|
-
return output
|
4746
|
+
#return output
|
4727
4747
|
except Exception as e:
|
4728
4748
|
traceback.print_exc()
|
4729
4749
|
print(e)
|
4730
4750
|
|
4731
|
-
|
4751
|
+
def minimize_function(self, loglike):
|
4752
|
+
r'Takes the logliklihood function and tranforms it to a more handed minimization function'
|
4753
|
+
return loglike/self.n_obs
|
4732
4754
|
def print_chol_mat(self, betas):
|
4733
4755
|
print(self.chol_mat)
|
4734
4756
|
self.get_br_and_bstd(betas)
|
@@ -5220,7 +5242,7 @@ class ObjectiveFunction(object):
|
|
5220
5242
|
if self.power_up_ll:
|
5221
5243
|
loglikelihood =-optim_res['fun']/2 - penalty
|
5222
5244
|
else:
|
5223
|
-
loglikelihood = -optim_res['fun'] - penalty
|
5245
|
+
loglikelihood = -optim_res['fun']/self.minimize_scaler - penalty
|
5224
5246
|
|
5225
5247
|
# self.coeff_names = coeff_names
|
5226
5248
|
# self.total_iter = optim_res['nit']
|
@@ -5378,7 +5400,7 @@ class ObjectiveFunction(object):
|
|
5378
5400
|
mod),
|
5379
5401
|
method=method2, tol=1e-5, options={'gtol': tol['gtol']},
|
5380
5402
|
bounds=bounds)
|
5381
|
-
|
5403
|
+
|
5382
5404
|
|
5383
5405
|
|
5384
5406
|
if method2 == 'L-BFGS-B':
|
{metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor.egg-info/PKG-INFO
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: metacountregressor
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.96
|
4
4
|
Summary: Extensions for a Python package for estimation of count models.
|
5
5
|
Home-page: https://github.com/zahern/CountDataEstimation
|
6
6
|
Author: Zeke Ahern
|
@@ -274,6 +274,8 @@ Let's begin by fitting very simple models and use the structure of these models
|
|
274
274
|
|
275
275
|
|
276
276
|
```python
|
277
|
+
|
278
|
+
'''Setup Data'''
|
277
279
|
df = pd.read_csv(
|
278
280
|
"https://raw.githubusercontent.com/zahern/data/main/Ex-16-3.csv")
|
279
281
|
X = df
|
@@ -281,25 +283,158 @@ y = df['FREQ'] # Frequency of crashes
|
|
281
283
|
X['Offset'] = np.log(df['AADT']) # Explicitley define how to offset the data, no offset otherwise
|
282
284
|
# Drop Y, selected offset term and ID as there are no panels
|
283
285
|
X = df.drop(columns=['FREQ', 'ID', 'AADT'])
|
284
|
-
|
286
|
+
'''Aguments for Solution'''
|
285
287
|
arguments = {
|
286
|
-
'
|
287
|
-
'is_multi': 1,
|
288
|
+
'is_multi': 1, #is two objectives considered
|
288
289
|
'test_percentage': 0.2, # used in multi-objective optimisation only. Saves 20% of data for testing.
|
289
290
|
'val_percentage:': 0.2, # Saves 20% of data for testing.
|
290
291
|
'test_complexity': 3, # For Very simple Models
|
291
292
|
'obj_1': 'BIC', '_obj_2': 'RMSE_TEST',
|
292
|
-
'instance_number': '
|
293
|
+
'instance_number': 'hs_run', # used for creeating a named folder where your models are saved into from the directory
|
293
294
|
'distribution': ['Normal'],
|
294
|
-
'Model': [0], # or equivalently ['POS', 'NB']
|
295
|
+
'Model': [0, 1], # or equivalently ['POS', 'NB']
|
295
296
|
'transformations': ['no', 'sqrt', 'archsinh'],
|
296
297
|
'_max_time': 10000
|
297
|
-
|
298
|
+
} '''Arguments for the solution algorithm'''
|
299
|
+
argument_hs = {
|
300
|
+
'_hms': 20, #harmony memory size,
|
301
|
+
'_mpai': 1, #adjustement inded
|
302
|
+
'_par': 0.3,
|
303
|
+
'_hmcr': .5
|
304
|
+
}
|
298
305
|
obj_fun = ObjectiveFunction(X, y, **arguments)
|
299
|
-
results = harmony_search(obj_fun)
|
306
|
+
results = harmony_search(obj_fun, None, argument_hs)
|
300
307
|
print(results)
|
301
308
|
```
|
302
309
|
|
310
|
+
## Example: Assistance by Differential Evololution and Simulated Annealing
|
311
|
+
Similiar to the above example we only need to change the hyperparamaters, the obj_fun can remane the same
|
312
|
+
|
313
|
+
|
314
|
+
```python
|
315
|
+
argument_de = {'_AI': 2,
|
316
|
+
'_crossover_perc': .2,
|
317
|
+
'_max_iter': 1000,
|
318
|
+
'_pop_size': 25
|
319
|
+
}
|
320
|
+
de_results = differential_evolution(obj_fun, None, **argument_de)
|
321
|
+
print(de_results)
|
322
|
+
|
323
|
+
|
324
|
+
args_sa = {'alpha': .99,
|
325
|
+
'STEPS_PER_TEMP': 10,
|
326
|
+
'INTL_ACPT': 0.5,
|
327
|
+
'_crossover_perc': .3,
|
328
|
+
'MAX_ITERATIONS': 1000,
|
329
|
+
'_num_intl_slns': 25,
|
330
|
+
}
|
331
|
+
|
332
|
+
sa_results = simulated_annealing(obj_fun, None, **args_sa)
|
333
|
+
print(sa_results)
|
334
|
+
```
|
335
|
+
|
336
|
+
## Comparing to statsmodels
|
337
|
+
The following example illustrates how the output compares to well-known packages, including Statsmodels."
|
338
|
+
|
339
|
+
|
340
|
+
```python
|
341
|
+
# Load modules and data
|
342
|
+
import statsmodels.api as sm
|
343
|
+
|
344
|
+
data = sm.datasets.sunspots.load_pandas().data
|
345
|
+
#print(data.exog)
|
346
|
+
data_exog = data['YEAR']
|
347
|
+
data_exog = sm.add_constant(data_exog)
|
348
|
+
data_endog = data['SUNACTIVITY']
|
349
|
+
|
350
|
+
# Instantiate a gamma family model with the default link function.
|
351
|
+
import numpy as np
|
352
|
+
|
353
|
+
gamma_model = sm.NegativeBinomial(data_endog, data_exog)
|
354
|
+
gamma_results = gamma_model.fit()
|
355
|
+
|
356
|
+
print(gamma_results.summary())
|
357
|
+
|
358
|
+
|
359
|
+
|
360
|
+
|
361
|
+
#NOW LET's COMPARE THIS TO METACOUNTREGRESSOR
|
362
|
+
|
363
|
+
|
364
|
+
|
365
|
+
|
366
|
+
#Model Decisions,
|
367
|
+
manual_fit_spec = {
|
368
|
+
'fixed_terms': ['const','YEAR'],
|
369
|
+
'rdm_terms': [],
|
370
|
+
'rdm_cor_terms': [],
|
371
|
+
'grouped_terms': [],
|
372
|
+
'hetro_in_means': [],
|
373
|
+
'transformations': ['no', 'no'],
|
374
|
+
'dispersion': 1 #Negative Binomial
|
375
|
+
}
|
376
|
+
|
377
|
+
|
378
|
+
#Arguments
|
379
|
+
arguments = {
|
380
|
+
'algorithm': 'hs',
|
381
|
+
'test_percentage': 0,
|
382
|
+
'test_complexity': 6,
|
383
|
+
'instance_number': 'name',
|
384
|
+
'Manual_Fit': manual_fit_spec
|
385
|
+
}
|
386
|
+
obj_fun = ObjectiveFunction(data_exog, data_endog, **arguments)
|
387
|
+
|
388
|
+
|
389
|
+
|
390
|
+
|
391
|
+
|
392
|
+
|
393
|
+
|
394
|
+
```
|
395
|
+
|
396
|
+
Optimization terminated successfully.
|
397
|
+
Current function value: 4.877748
|
398
|
+
Iterations: 22
|
399
|
+
Function evaluations: 71
|
400
|
+
Gradient evaluations: 70
|
401
|
+
NegativeBinomial Regression Results
|
402
|
+
==============================================================================
|
403
|
+
Dep. Variable: SUNACTIVITY No. Observations: 309
|
404
|
+
Model: NegativeBinomial Df Residuals: 307
|
405
|
+
Method: MLE Df Model: 1
|
406
|
+
Date: Tue, 13 Aug 2024 Pseudo R-squ.: 0.004087
|
407
|
+
Time: 14:13:22 Log-Likelihood: -1507.2
|
408
|
+
converged: True LL-Null: -1513.4
|
409
|
+
Covariance Type: nonrobust LLR p-value: 0.0004363
|
410
|
+
==============================================================================
|
411
|
+
coef std err z P>|z| [0.025 0.975]
|
412
|
+
------------------------------------------------------------------------------
|
413
|
+
const 0.2913 1.017 0.287 0.774 -1.701 2.284
|
414
|
+
YEAR 0.0019 0.001 3.546 0.000 0.001 0.003
|
415
|
+
alpha 0.7339 0.057 12.910 0.000 0.622 0.845
|
416
|
+
==============================================================================
|
417
|
+
0.1.88
|
418
|
+
Setup Complete...
|
419
|
+
Benchmaking test with Seed 42
|
420
|
+
1
|
421
|
+
--------------------------------------------------------------------------------
|
422
|
+
Log-Likelihood: -1509.0683662284273
|
423
|
+
--------------------------------------------------------------------------------
|
424
|
+
bic: 3035.84
|
425
|
+
--------------------------------------------------------------------------------
|
426
|
+
MSE: 10000000.00
|
427
|
+
+--------+--------+-------+----------+----------+------------+
|
428
|
+
| Effect | $\tau$ | Coeff | Std. Err | z-values | Prob |z|>Z |
|
429
|
+
+========+========+=======+==========+==========+============+
|
430
|
+
| const | no | 0.10 | 0.25 | 0.39 | 0.70 |
|
431
|
+
+--------+--------+-------+----------+----------+------------+
|
432
|
+
| YEAR | no | 0.00 | 0.00 | 20.39 | 0.00*** |
|
433
|
+
+--------+--------+-------+----------+----------+------------+
|
434
|
+
| nb | | 1.33 | 0.00 | 50.00 | 0.00*** |
|
435
|
+
+--------+--------+-------+----------+----------+------------+
|
436
|
+
|
437
|
+
|
303
438
|
## Paper
|
304
439
|
|
305
440
|
The following tutorial is in conjunction with our latest paper. A link the current paper can be found here [MetaCountRegressor](https://www.overleaf.com/read/mszwpwzcxsng#c5eb0c)
|
File without changes
|
File without changes
|
File without changes
|
{metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor/data_split_helper.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor/metaheuristics.py
RENAMED
File without changes
|
File without changes
|
{metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor/pareto_logger__plot.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor/test_generated_paper2.py
RENAMED
File without changes
|
{metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor.egg-info/SOURCES.txt
RENAMED
File without changes
|
File without changes
|
{metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor.egg-info/not-zip-safe
RENAMED
File without changes
|
{metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor.egg-info/requires.txt
RENAMED
File without changes
|
{metacountregressor-0.1.86 → metacountregressor-0.1.96}/metacountregressor.egg-info/top_level.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|