metacountregressor 0.1.91__tar.gz → 0.1.101__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {metacountregressor-0.1.91 → metacountregressor-0.1.101}/PKG-INFO +143 -8
- {metacountregressor-0.1.91 → metacountregressor-0.1.101}/README.rst +150 -8
- metacountregressor-0.1.101/metacountregressor/app_main.py +253 -0
- {metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor/main.py +80 -48
- {metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor/metaheuristics.py +2 -2
- {metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor/solution.py +65 -13
- {metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor.egg-info/PKG-INFO +143 -8
- {metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor.egg-info/SOURCES.txt +1 -0
- {metacountregressor-0.1.91 → metacountregressor-0.1.101}/LICENSE.txt +0 -0
- {metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor/__init__.py +0 -0
- {metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor/_device_cust.py +0 -0
- {metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor/data_split_helper.py +0 -0
- {metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor/halton.py +0 -0
- {metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor/helperprocess.py +0 -0
- {metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor/main_old.py +0 -0
- {metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor/pareto_file.py +0 -0
- {metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor/pareto_logger__plot.py +0 -0
- {metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor/setup.py +0 -0
- {metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor/single_objective_finder.py +0 -0
- {metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor/test_generated_paper2.py +0 -0
- {metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor.egg-info/dependency_links.txt +0 -0
- {metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor.egg-info/not-zip-safe +0 -0
- {metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor.egg-info/requires.txt +0 -0
- {metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor.egg-info/top_level.txt +0 -0
- {metacountregressor-0.1.91 → metacountregressor-0.1.101}/setup.cfg +0 -0
- {metacountregressor-0.1.91 → metacountregressor-0.1.101}/setup.py +0 -0
- {metacountregressor-0.1.91 → metacountregressor-0.1.101}/tests/test.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: metacountregressor
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.101
|
4
4
|
Summary: Extensions for a Python package for estimation of count models.
|
5
5
|
Home-page: https://github.com/zahern/CountDataEstimation
|
6
6
|
Author: Zeke Ahern
|
@@ -274,6 +274,8 @@ Let's begin by fitting very simple models and use the structure of these models
|
|
274
274
|
|
275
275
|
|
276
276
|
```python
|
277
|
+
|
278
|
+
'''Setup Data'''
|
277
279
|
df = pd.read_csv(
|
278
280
|
"https://raw.githubusercontent.com/zahern/data/main/Ex-16-3.csv")
|
279
281
|
X = df
|
@@ -281,25 +283,158 @@ y = df['FREQ'] # Frequency of crashes
|
|
281
283
|
X['Offset'] = np.log(df['AADT']) # Explicitley define how to offset the data, no offset otherwise
|
282
284
|
# Drop Y, selected offset term and ID as there are no panels
|
283
285
|
X = df.drop(columns=['FREQ', 'ID', 'AADT'])
|
284
|
-
|
286
|
+
'''Aguments for Solution'''
|
285
287
|
arguments = {
|
286
|
-
'
|
287
|
-
'is_multi': 1,
|
288
|
+
'is_multi': 1, #is two objectives considered
|
288
289
|
'test_percentage': 0.2, # used in multi-objective optimisation only. Saves 20% of data for testing.
|
289
290
|
'val_percentage:': 0.2, # Saves 20% of data for testing.
|
290
291
|
'test_complexity': 3, # For Very simple Models
|
291
292
|
'obj_1': 'BIC', '_obj_2': 'RMSE_TEST',
|
292
|
-
'instance_number': '
|
293
|
+
'instance_number': 'hs_run', # used for creeating a named folder where your models are saved into from the directory
|
293
294
|
'distribution': ['Normal'],
|
294
|
-
'Model': [0], # or equivalently ['POS', 'NB']
|
295
|
+
'Model': [0, 1], # or equivalently ['POS', 'NB']
|
295
296
|
'transformations': ['no', 'sqrt', 'archsinh'],
|
296
297
|
'_max_time': 10000
|
297
|
-
|
298
|
+
} '''Arguments for the solution algorithm'''
|
299
|
+
argument_hs = {
|
300
|
+
'_hms': 20, #harmony memory size,
|
301
|
+
'_mpai': 1, #adjustement inded
|
302
|
+
'_par': 0.3,
|
303
|
+
'_hmcr': .5
|
304
|
+
}
|
298
305
|
obj_fun = ObjectiveFunction(X, y, **arguments)
|
299
|
-
results = harmony_search(obj_fun)
|
306
|
+
results = harmony_search(obj_fun, None, argument_hs)
|
300
307
|
print(results)
|
301
308
|
```
|
302
309
|
|
310
|
+
## Example: Assistance by Differential Evololution and Simulated Annealing
|
311
|
+
Similiar to the above example we only need to change the hyperparamaters, the obj_fun can remane the same
|
312
|
+
|
313
|
+
|
314
|
+
```python
|
315
|
+
argument_de = {'_AI': 2,
|
316
|
+
'_crossover_perc': .2,
|
317
|
+
'_max_iter': 1000,
|
318
|
+
'_pop_size': 25
|
319
|
+
}
|
320
|
+
de_results = differential_evolution(obj_fun, None, **argument_de)
|
321
|
+
print(de_results)
|
322
|
+
|
323
|
+
|
324
|
+
args_sa = {'alpha': .99,
|
325
|
+
'STEPS_PER_TEMP': 10,
|
326
|
+
'INTL_ACPT': 0.5,
|
327
|
+
'_crossover_perc': .3,
|
328
|
+
'MAX_ITERATIONS': 1000,
|
329
|
+
'_num_intl_slns': 25,
|
330
|
+
}
|
331
|
+
|
332
|
+
sa_results = simulated_annealing(obj_fun, None, **args_sa)
|
333
|
+
print(sa_results)
|
334
|
+
```
|
335
|
+
|
336
|
+
## Comparing to statsmodels
|
337
|
+
The following example illustrates how the output compares to well-known packages, including Statsmodels."
|
338
|
+
|
339
|
+
|
340
|
+
```python
|
341
|
+
# Load modules and data
|
342
|
+
import statsmodels.api as sm
|
343
|
+
|
344
|
+
data = sm.datasets.sunspots.load_pandas().data
|
345
|
+
#print(data.exog)
|
346
|
+
data_exog = data['YEAR']
|
347
|
+
data_exog = sm.add_constant(data_exog)
|
348
|
+
data_endog = data['SUNACTIVITY']
|
349
|
+
|
350
|
+
# Instantiate a gamma family model with the default link function.
|
351
|
+
import numpy as np
|
352
|
+
|
353
|
+
gamma_model = sm.NegativeBinomial(data_endog, data_exog)
|
354
|
+
gamma_results = gamma_model.fit()
|
355
|
+
|
356
|
+
print(gamma_results.summary())
|
357
|
+
|
358
|
+
|
359
|
+
|
360
|
+
|
361
|
+
#NOW LET's COMPARE THIS TO METACOUNTREGRESSOR
|
362
|
+
|
363
|
+
|
364
|
+
|
365
|
+
|
366
|
+
#Model Decisions,
|
367
|
+
manual_fit_spec = {
|
368
|
+
'fixed_terms': ['const','YEAR'],
|
369
|
+
'rdm_terms': [],
|
370
|
+
'rdm_cor_terms': [],
|
371
|
+
'grouped_terms': [],
|
372
|
+
'hetro_in_means': [],
|
373
|
+
'transformations': ['no', 'no'],
|
374
|
+
'dispersion': 1 #Negative Binomial
|
375
|
+
}
|
376
|
+
|
377
|
+
|
378
|
+
#Arguments
|
379
|
+
arguments = {
|
380
|
+
'algorithm': 'hs',
|
381
|
+
'test_percentage': 0,
|
382
|
+
'test_complexity': 6,
|
383
|
+
'instance_number': 'name',
|
384
|
+
'Manual_Fit': manual_fit_spec
|
385
|
+
}
|
386
|
+
obj_fun = ObjectiveFunction(data_exog, data_endog, **arguments)
|
387
|
+
|
388
|
+
|
389
|
+
|
390
|
+
|
391
|
+
|
392
|
+
|
393
|
+
|
394
|
+
```
|
395
|
+
|
396
|
+
Optimization terminated successfully.
|
397
|
+
Current function value: 4.877748
|
398
|
+
Iterations: 22
|
399
|
+
Function evaluations: 71
|
400
|
+
Gradient evaluations: 70
|
401
|
+
NegativeBinomial Regression Results
|
402
|
+
==============================================================================
|
403
|
+
Dep. Variable: SUNACTIVITY No. Observations: 309
|
404
|
+
Model: NegativeBinomial Df Residuals: 307
|
405
|
+
Method: MLE Df Model: 1
|
406
|
+
Date: Tue, 13 Aug 2024 Pseudo R-squ.: 0.004087
|
407
|
+
Time: 14:13:22 Log-Likelihood: -1507.2
|
408
|
+
converged: True LL-Null: -1513.4
|
409
|
+
Covariance Type: nonrobust LLR p-value: 0.0004363
|
410
|
+
==============================================================================
|
411
|
+
coef std err z P>|z| [0.025 0.975]
|
412
|
+
------------------------------------------------------------------------------
|
413
|
+
const 0.2913 1.017 0.287 0.774 -1.701 2.284
|
414
|
+
YEAR 0.0019 0.001 3.546 0.000 0.001 0.003
|
415
|
+
alpha 0.7339 0.057 12.910 0.000 0.622 0.845
|
416
|
+
==============================================================================
|
417
|
+
0.1.88
|
418
|
+
Setup Complete...
|
419
|
+
Benchmaking test with Seed 42
|
420
|
+
1
|
421
|
+
--------------------------------------------------------------------------------
|
422
|
+
Log-Likelihood: -1509.0683662284273
|
423
|
+
--------------------------------------------------------------------------------
|
424
|
+
bic: 3035.84
|
425
|
+
--------------------------------------------------------------------------------
|
426
|
+
MSE: 10000000.00
|
427
|
+
+--------+--------+-------+----------+----------+------------+
|
428
|
+
| Effect | $\tau$ | Coeff | Std. Err | z-values | Prob |z|>Z |
|
429
|
+
+========+========+=======+==========+==========+============+
|
430
|
+
| const | no | 0.10 | 0.25 | 0.39 | 0.70 |
|
431
|
+
+--------+--------+-------+----------+----------+------------+
|
432
|
+
| YEAR | no | 0.00 | 0.00 | 20.39 | 0.00*** |
|
433
|
+
+--------+--------+-------+----------+----------+------------+
|
434
|
+
| nb | | 1.33 | 0.00 | 50.00 | 0.00*** |
|
435
|
+
+--------+--------+-------+----------+----------+------------+
|
436
|
+
|
437
|
+
|
303
438
|
## Paper
|
304
439
|
|
305
440
|
The following tutorial is in conjunction with our latest paper. A link the current paper can be found here [MetaCountRegressor](https://www.overleaf.com/read/mszwpwzcxsng#c5eb0c)
|
@@ -9,7 +9,7 @@ Tutorial also available as a jupyter notebook
|
|
9
9
|
=============================================
|
10
10
|
|
11
11
|
`Download Example
|
12
|
-
Notebook <https://github.com/zahern/CountDataEstimation/blob/main/
|
12
|
+
Notebook <https://github.com/zahern/CountDataEstimation/blob/main/Tutorial.ipynb>`__
|
13
13
|
|
14
14
|
The tutorial provides more extensive examples on how to run the code and
|
15
15
|
perform experiments. Further documentation is currently in development.
|
@@ -376,6 +376,8 @@ factors for our search.
|
|
376
376
|
|
377
377
|
.. code:: ipython3
|
378
378
|
|
379
|
+
|
380
|
+
'''Setup Data'''
|
379
381
|
df = pd.read_csv(
|
380
382
|
"https://raw.githubusercontent.com/zahern/data/main/Ex-16-3.csv")
|
381
383
|
X = df
|
@@ -383,24 +385,164 @@ factors for our search.
|
|
383
385
|
X['Offset'] = np.log(df['AADT']) # Explicitley define how to offset the data, no offset otherwise
|
384
386
|
# Drop Y, selected offset term and ID as there are no panels
|
385
387
|
X = df.drop(columns=['FREQ', 'ID', 'AADT'])
|
386
|
-
|
388
|
+
'''Aguments for Solution'''
|
387
389
|
arguments = {
|
388
|
-
'
|
389
|
-
'is_multi': 1,
|
390
|
+
'is_multi': 1, #is two objectives considered
|
390
391
|
'test_percentage': 0.2, # used in multi-objective optimisation only. Saves 20% of data for testing.
|
391
392
|
'val_percentage:': 0.2, # Saves 20% of data for testing.
|
392
393
|
'test_complexity': 3, # For Very simple Models
|
393
394
|
'obj_1': 'BIC', '_obj_2': 'RMSE_TEST',
|
394
|
-
'instance_number': '
|
395
|
+
'instance_number': 'hs_run', # used for creeating a named folder where your models are saved into from the directory
|
395
396
|
'distribution': ['Normal'],
|
396
|
-
'Model': [0], # or equivalently ['POS', 'NB']
|
397
|
+
'Model': [0, 1], # or equivalently ['POS', 'NB']
|
397
398
|
'transformations': ['no', 'sqrt', 'archsinh'],
|
398
399
|
'_max_time': 10000
|
399
|
-
|
400
|
+
} '''Arguments for the solution algorithm'''
|
401
|
+
argument_hs = {
|
402
|
+
'_hms': 20, #harmony memory size,
|
403
|
+
'_mpai': 1, #adjustement inded
|
404
|
+
'_par': 0.3,
|
405
|
+
'_hmcr': .5
|
406
|
+
}
|
400
407
|
obj_fun = ObjectiveFunction(X, y, **arguments)
|
401
|
-
results = harmony_search(obj_fun)
|
408
|
+
results = harmony_search(obj_fun, None, argument_hs)
|
402
409
|
print(results)
|
403
410
|
|
411
|
+
Example: Assistance by Differential Evololution and Simulated Annealing
|
412
|
+
-----------------------------------------------------------------------
|
413
|
+
|
414
|
+
Similiar to the above example we only need to change the
|
415
|
+
hyperparamaters, the obj_fun can remane the same
|
416
|
+
|
417
|
+
.. code:: ipython3
|
418
|
+
|
419
|
+
argument_de = {'_AI': 2,
|
420
|
+
'_crossover_perc': .2,
|
421
|
+
'_max_iter': 1000,
|
422
|
+
'_pop_size': 25
|
423
|
+
}
|
424
|
+
de_results = differential_evolution(obj_fun, None, **argument_de)
|
425
|
+
print(de_results)
|
426
|
+
|
427
|
+
|
428
|
+
args_sa = {'alpha': .99,
|
429
|
+
'STEPS_PER_TEMP': 10,
|
430
|
+
'INTL_ACPT': 0.5,
|
431
|
+
'_crossover_perc': .3,
|
432
|
+
'MAX_ITERATIONS': 1000,
|
433
|
+
'_num_intl_slns': 25,
|
434
|
+
}
|
435
|
+
|
436
|
+
sa_results = simulated_annealing(obj_fun, None, **args_sa)
|
437
|
+
print(sa_results)
|
438
|
+
|
439
|
+
Comparing to statsmodels
|
440
|
+
------------------------
|
441
|
+
|
442
|
+
The following example illustrates how the output compares to well-known
|
443
|
+
packages, including Statsmodels.�
|
444
|
+
|
445
|
+
.. code:: ipython3
|
446
|
+
|
447
|
+
# Load modules and data
|
448
|
+
import statsmodels.api as sm
|
449
|
+
|
450
|
+
data = sm.datasets.sunspots.load_pandas().data
|
451
|
+
#print(data.exog)
|
452
|
+
data_exog = data['YEAR']
|
453
|
+
data_exog = sm.add_constant(data_exog)
|
454
|
+
data_endog = data['SUNACTIVITY']
|
455
|
+
|
456
|
+
# Instantiate a gamma family model with the default link function.
|
457
|
+
import numpy as np
|
458
|
+
|
459
|
+
gamma_model = sm.NegativeBinomial(data_endog, data_exog)
|
460
|
+
gamma_results = gamma_model.fit()
|
461
|
+
|
462
|
+
print(gamma_results.summary())
|
463
|
+
|
464
|
+
|
465
|
+
|
466
|
+
|
467
|
+
#NOW LET's COMPARE THIS TO METACOUNTREGRESSOR
|
468
|
+
|
469
|
+
|
470
|
+
|
471
|
+
|
472
|
+
#Model Decisions,
|
473
|
+
manual_fit_spec = {
|
474
|
+
'fixed_terms': ['const','YEAR'],
|
475
|
+
'rdm_terms': [],
|
476
|
+
'rdm_cor_terms': [],
|
477
|
+
'grouped_terms': [],
|
478
|
+
'hetro_in_means': [],
|
479
|
+
'transformations': ['no', 'no'],
|
480
|
+
'dispersion': 1 #Negative Binomial
|
481
|
+
}
|
482
|
+
|
483
|
+
|
484
|
+
#Arguments
|
485
|
+
arguments = {
|
486
|
+
'algorithm': 'hs',
|
487
|
+
'test_percentage': 0,
|
488
|
+
'test_complexity': 6,
|
489
|
+
'instance_number': 'name',
|
490
|
+
'Manual_Fit': manual_fit_spec
|
491
|
+
}
|
492
|
+
obj_fun = ObjectiveFunction(data_exog, data_endog, **arguments)
|
493
|
+
|
494
|
+
|
495
|
+
|
496
|
+
|
497
|
+
|
498
|
+
|
499
|
+
|
500
|
+
|
501
|
+
|
502
|
+
.. parsed-literal::
|
503
|
+
|
504
|
+
Optimization terminated successfully.
|
505
|
+
Current function value: 4.877748
|
506
|
+
Iterations: 22
|
507
|
+
Function evaluations: 71
|
508
|
+
Gradient evaluations: 70
|
509
|
+
NegativeBinomial Regression Results
|
510
|
+
==============================================================================
|
511
|
+
Dep. Variable: SUNACTIVITY No. Observations: 309
|
512
|
+
Model: NegativeBinomial Df Residuals: 307
|
513
|
+
Method: MLE Df Model: 1
|
514
|
+
Date: Tue, 13 Aug 2024 Pseudo R-squ.: 0.004087
|
515
|
+
Time: 14:13:22 Log-Likelihood: -1507.2
|
516
|
+
converged: True LL-Null: -1513.4
|
517
|
+
Covariance Type: nonrobust LLR p-value: 0.0004363
|
518
|
+
==============================================================================
|
519
|
+
coef std err z P>|z| [0.025 0.975]
|
520
|
+
------------------------------------------------------------------------------
|
521
|
+
const 0.2913 1.017 0.287 0.774 -1.701 2.284
|
522
|
+
YEAR 0.0019 0.001 3.546 0.000 0.001 0.003
|
523
|
+
alpha 0.7339 0.057 12.910 0.000 0.622 0.845
|
524
|
+
==============================================================================
|
525
|
+
0.1.88
|
526
|
+
Setup Complete...
|
527
|
+
Benchmaking test with Seed 42
|
528
|
+
1
|
529
|
+
--------------------------------------------------------------------------------
|
530
|
+
Log-Likelihood: -1509.0683662284273
|
531
|
+
--------------------------------------------------------------------------------
|
532
|
+
bic: 3035.84
|
533
|
+
--------------------------------------------------------------------------------
|
534
|
+
MSE: 10000000.00
|
535
|
+
+--------+--------+-------+----------+----------+------------+
|
536
|
+
| Effect | $\tau$ | Coeff | Std. Err | z-values | Prob |z|>Z |
|
537
|
+
+========+========+=======+==========+==========+============+
|
538
|
+
| const | no | 0.10 | 0.25 | 0.39 | 0.70 |
|
539
|
+
+--------+--------+-------+----------+----------+------------+
|
540
|
+
| YEAR | no | 0.00 | 0.00 | 20.39 | 0.00*** |
|
541
|
+
+--------+--------+-------+----------+----------+------------+
|
542
|
+
| nb | | 1.33 | 0.00 | 50.00 | 0.00*** |
|
543
|
+
+--------+--------+-------+----------+----------+------------+
|
544
|
+
|
545
|
+
|
404
546
|
Paper
|
405
547
|
-----
|
406
548
|
|
@@ -0,0 +1,253 @@
|
|
1
|
+
import warnings
|
2
|
+
import argparse
|
3
|
+
import csv
|
4
|
+
import faulthandler
|
5
|
+
import ast
|
6
|
+
from typing import Any
|
7
|
+
import cProfile
|
8
|
+
import numpy as np
|
9
|
+
import pandas as pd
|
10
|
+
from pandas import DataFrame
|
11
|
+
from pandas.io.parsers import TextFileReader
|
12
|
+
import helperprocess
|
13
|
+
from metaheuristics import (differential_evolution,
|
14
|
+
harmony_search,
|
15
|
+
simulated_annealing)
|
16
|
+
from solution import ObjectiveFunction
|
17
|
+
|
18
|
+
|
19
|
+
warnings.simplefilter("ignore")
|
20
|
+
|
21
|
+
faulthandler.enable()
|
22
|
+
|
23
|
+
|
24
|
+
def convert_df_columns_to_binary_and_wide(df):
|
25
|
+
columns = list(df.columns)
|
26
|
+
|
27
|
+
df = pd.get_dummies(df, columns=columns, drop_first=True)
|
28
|
+
return df
|
29
|
+
|
30
|
+
|
31
|
+
def process_arguments():
|
32
|
+
'''
|
33
|
+
TRYING TO TURN THE CSV FILES INTO RELEVANT ARGS
|
34
|
+
'''
|
35
|
+
try:
|
36
|
+
data_characteristic = pd.read_csv('problem_data.csv')
|
37
|
+
analyst_d = pd.read_csv('decisions.csv')
|
38
|
+
hyper = pd.read_csv('setup_hyper.csv')
|
39
|
+
except Exception as e:
|
40
|
+
print(e)
|
41
|
+
print('Files Have Not Been Set Up Yet..')
|
42
|
+
print('Run the App')
|
43
|
+
exit()
|
44
|
+
|
45
|
+
new_data = {'data': data_characteristic,
|
46
|
+
'analyst':analyst_d,
|
47
|
+
'hyper': hyper}
|
48
|
+
return new_data
|
49
|
+
|
50
|
+
def main(args, **kwargs):
|
51
|
+
'''METACOUNT REGRESSOR TESTING ENVIRONMENT'''
|
52
|
+
|
53
|
+
|
54
|
+
|
55
|
+
|
56
|
+
print('the args is:', args)
|
57
|
+
print('the kwargs is', kwargs)
|
58
|
+
|
59
|
+
# removing junk files if specicified
|
60
|
+
helperprocess.remove_files(args.get('removeFiles', True))
|
61
|
+
|
62
|
+
# do we want to run a test
|
63
|
+
|
64
|
+
|
65
|
+
data_info = process_arguments()
|
66
|
+
data_info['hyper']
|
67
|
+
data_info['analyst']
|
68
|
+
data_info['data']['Y']
|
69
|
+
#data_info['data']['Group'][0]
|
70
|
+
#data_info['data']['Panel'][0]
|
71
|
+
args['decisions'] = data_info['analyst']
|
72
|
+
|
73
|
+
if not np.isnan(data_info['data']['Grouped'][0]):
|
74
|
+
args['group'] = data_info['data']['Grouped'][0]
|
75
|
+
args['ID'] = data_info['data']['Grouped'][0]
|
76
|
+
if not np.isnan(data_info['data']['Panel'][0]):
|
77
|
+
args['panels'] = data_info['data']['Panel'][0]
|
78
|
+
|
79
|
+
df = pd.read_csv(str(data_info['data']['Problem'][0]))
|
80
|
+
x_df = df.drop(columns=[data_info['data']['Y'][0]])
|
81
|
+
y_df = df[[data_info['data']['Y'][0]]]
|
82
|
+
y_df.rename(columns={data_info['data']['Y'][0]: "Y"}, inplace=True)
|
83
|
+
|
84
|
+
manual_fit_spec = None #TODO add in manual fit
|
85
|
+
if args['Keep_Fit'] == str(2) or args['Keep_Fit'] == 2:
|
86
|
+
if manual_fit_spec is None:
|
87
|
+
args['Manual_Fit'] = None
|
88
|
+
else:
|
89
|
+
print('fitting manually')
|
90
|
+
args['Manual_Fit'] = manual_fit_spec
|
91
|
+
if args['problem_number'] == str(8) or args['problem_number'] == 8:
|
92
|
+
print('Maine County Dataset.')
|
93
|
+
args['group'] = 'county'
|
94
|
+
args['panels'] = 'element_ID'
|
95
|
+
args['ID'] = 'element_ID'
|
96
|
+
args['_max_characteristics'] = 55
|
97
|
+
elif args['problem_number'] == str(9) or args['problem_number'] == 9:
|
98
|
+
args['group'] = 'group'
|
99
|
+
args['panels'] = 'ind_id'
|
100
|
+
args['ID'] = 'ind_id'
|
101
|
+
|
102
|
+
|
103
|
+
|
104
|
+
args['complexity_level'] = args.get('complexity_level', 6)
|
105
|
+
|
106
|
+
|
107
|
+
# Initialize AnalystSpecs to None if not manually provided
|
108
|
+
args['AnalystSpecs'] = args.get('AnalystSpecs', None)
|
109
|
+
|
110
|
+
if args['algorithm'] == 'sa':
|
111
|
+
args_hyperparameters = {'alpha': float(args['temp_scale']),
|
112
|
+
'STEPS_PER_TEMP': int(args['steps']),
|
113
|
+
'INTL_ACPT': 0.5,
|
114
|
+
'_crossover_perc': args['crossover'],
|
115
|
+
'MAX_ITERATIONS': int(args['_max_imp']),
|
116
|
+
'_num_intl_slns': 25,
|
117
|
+
'Manual_Fit': args['Manual_Fit'],
|
118
|
+
'MP': int(args['MP'])}
|
119
|
+
helperprocess.entries_to_remove(('crossover', '_max_imp', '_hms', '_hmcr', '_par'), args)
|
120
|
+
print(args)
|
121
|
+
|
122
|
+
obj_fun = ObjectiveFunction(x_df, y_df, **args)
|
123
|
+
|
124
|
+
results = simulated_annealing(obj_fun, None, **args_hyperparameters)
|
125
|
+
|
126
|
+
helperprocess.results_printer(results, args['algorithm'], int(args['is_multi']))
|
127
|
+
|
128
|
+
if args['dual_complexities']:
|
129
|
+
args['complexity_level'] = args['secondary_complexity']
|
130
|
+
obj_fun = ObjectiveFunction(x_df, y_df, **args)
|
131
|
+
results = simulated_annealing(obj_fun, None, **args_hyperparameters)
|
132
|
+
helperprocess.results_printer(results, args['algorithm'], int(args['is_multi']))
|
133
|
+
|
134
|
+
elif args['algorithm'] == 'hs':
|
135
|
+
args['_mpai'] = 1
|
136
|
+
|
137
|
+
obj_fun = ObjectiveFunction(x_df, y_df, **args)
|
138
|
+
args_hyperparameters = {
|
139
|
+
'Manual_Fit': args['Manual_Fit'],
|
140
|
+
'MP': int(args['MP'])
|
141
|
+
}
|
142
|
+
|
143
|
+
results = harmony_search(obj_fun, None, **args_hyperparameters)
|
144
|
+
helperprocess.results_printer(results, args['algorithm'], int(args['is_multi']))
|
145
|
+
|
146
|
+
if args.get('dual_complexities', 0):
|
147
|
+
args['complexity_level'] = args['secondary_complexity']
|
148
|
+
obj_fun = ObjectiveFunction(x_df, y_df, **args)
|
149
|
+
results = harmony_search(obj_fun, None, **args_hyperparameters)
|
150
|
+
helperprocess.results_printer(results, args['algorithm'], int(args['is_multi']))
|
151
|
+
|
152
|
+
|
153
|
+
elif args['algorithm'] == 'de':
|
154
|
+
# force variables
|
155
|
+
args['must_include'] = args.get('force', [])
|
156
|
+
|
157
|
+
args_hyperparameters = {'_AI': args.get('_AI', 2),
|
158
|
+
'_crossover_perc': float(args['crossover']),
|
159
|
+
'_max_iter': int(args['_max_imp'])
|
160
|
+
, '_pop_size': int(args['_hms']), 'instance_number': int(args['line'])
|
161
|
+
, 'Manual_Fit': args['Manual_Fit'],
|
162
|
+
'MP': int(args['MP'])
|
163
|
+
}
|
164
|
+
|
165
|
+
args_hyperparameters = dict(args_hyperparameters)
|
166
|
+
|
167
|
+
helperprocess.entries_to_remove(('crossover', '_max_imp', '_hms', '_hmcr', '_par'), args)
|
168
|
+
obj_fun = ObjectiveFunction(x_df, y_df, **args)
|
169
|
+
|
170
|
+
results = differential_evolution(obj_fun, None, **args_hyperparameters)
|
171
|
+
|
172
|
+
helperprocess.results_printer(results, args['algorithm'], int(args['is_multi']))
|
173
|
+
|
174
|
+
if args['dual_complexities']:
|
175
|
+
args['complexity_level'] = args['secondary_complexity']
|
176
|
+
obj_fun = ObjectiveFunction(x_df, y_df, **args)
|
177
|
+
results = differential_evolution(obj_fun, None, **args_hyperparameters)
|
178
|
+
helperprocess.results_printer(results, args['algorithm'], int(args['is_multi'])) #TODO FIX This
|
179
|
+
|
180
|
+
|
181
|
+
if __name__ == '__main__':
|
182
|
+
"""Loading in command line args. """
|
183
|
+
alg_parser = argparse.ArgumentParser(prog='algorithm', epilog='algorithm specific arguments')
|
184
|
+
alg_parser.add_argument('-AI', default=2, help='adjustment index. For the allowable movement of the algorithm')
|
185
|
+
alg_parser.print_help()
|
186
|
+
parser = argparse.ArgumentParser(prog='main',
|
187
|
+
epilog=main.__doc__,
|
188
|
+
formatter_class=argparse.RawDescriptionHelpFormatter, conflict_handler='resolve')
|
189
|
+
|
190
|
+
parser.add_argument('-line', type=int, default=1,
|
191
|
+
help='line to read in csv to pass in argument')
|
192
|
+
|
193
|
+
if vars(parser.parse_args())['line'] is not None:
|
194
|
+
reader = csv.DictReader(open('set_data.csv', 'r'))
|
195
|
+
args = list()
|
196
|
+
line_number_obs = 0
|
197
|
+
for dictionary in reader: # TODO find a way to handle multiple args
|
198
|
+
args = dictionary
|
199
|
+
if line_number_obs == int(vars(parser.parse_args())['line']):
|
200
|
+
break
|
201
|
+
line_number_obs += 1
|
202
|
+
args = dict(args)
|
203
|
+
|
204
|
+
for key, value in args.items():
|
205
|
+
try:
|
206
|
+
# Attempt to parse the string value to a Python literal if value is a string.
|
207
|
+
if isinstance(value, str):
|
208
|
+
value = ast.literal_eval(value)
|
209
|
+
except (ValueError, SyntaxError):
|
210
|
+
# If there's a parsing error, value remains as the original string.
|
211
|
+
pass
|
212
|
+
|
213
|
+
# Add the argument to the parser with the potentially updated value.
|
214
|
+
parser.add_argument(f'-{key}', default=value)
|
215
|
+
|
216
|
+
for i, action in enumerate(parser._optionals._actions):
|
217
|
+
if "-algorithm" in action.option_strings:
|
218
|
+
parser._optionals._actions[i].help = "optimization algorithm"
|
219
|
+
|
220
|
+
override = True
|
221
|
+
if override:
|
222
|
+
print('todo turn off, in testing phase')
|
223
|
+
parser.add_argument('-problem_number', default='10')
|
224
|
+
print('did it make it')
|
225
|
+
if 'algorithm' not in args:
|
226
|
+
parser.add_argument('-algorithm', type=str, default='hs',
|
227
|
+
help='optimization algorithm')
|
228
|
+
elif 'Manual_Fit' not in args:
|
229
|
+
parser.add_argument('-Manual_Fit', action='store_false', default=None,
|
230
|
+
help='To fit a model manually if desired.')
|
231
|
+
|
232
|
+
parser.add_argument('-seperate_out_factors', action='store_false', default=False,
|
233
|
+
help='Trie of wanting to split data that is potentially categorical as binary'
|
234
|
+
' we want to split the data for processing')
|
235
|
+
parser.add_argument('-supply_csv', type = str, help = 'enter the name of the csv, please include it as a full directorys')
|
236
|
+
|
237
|
+
else: # DIDN"T SPECIFY LINES TRY EACH ONE MANNUALY
|
238
|
+
parser.add_argument('-com', type=str, default='MetaCode',
|
239
|
+
help='line to read csv')
|
240
|
+
|
241
|
+
# Check the args
|
242
|
+
parser.print_help()
|
243
|
+
args = vars(parser.parse_args())
|
244
|
+
print(type(args))
|
245
|
+
# TODO add in chi 2 and df in estimation and compare degrees of freedom this needs to be done in solution
|
246
|
+
|
247
|
+
# Print the args.
|
248
|
+
profiler = cProfile.Profile()
|
249
|
+
profiler.runcall(main,args)
|
250
|
+
profiler.print_stats(sort='time')
|
251
|
+
#TOO MAX_TIME
|
252
|
+
|
253
|
+
|
@@ -28,63 +28,75 @@ def convert_df_columns_to_binary_and_wide(df):
|
|
28
28
|
return df
|
29
29
|
|
30
30
|
|
31
|
+
def process_arguments():
|
32
|
+
'''
|
33
|
+
TRYING TO TURN THE CSV FILES INTO RELEVANT ARGS
|
34
|
+
'''
|
35
|
+
data_characteristic = pd.read_csv('problem_data.csv')
|
36
|
+
analyst_d = pd.read_csv('decisions.csv')
|
37
|
+
hyper = pd.read_csv('setup_hyper.csv')
|
38
|
+
|
39
|
+
new_data = {'data': data_characteristic,
|
40
|
+
'analyst':analyst_d,
|
41
|
+
'hyper': hyper}
|
42
|
+
return new_data
|
43
|
+
|
31
44
|
def main(args, **kwargs):
|
32
45
|
'''METACOUNT REGRESSOR TESTING ENVIRONMENT'''
|
33
|
-
import statsmodels.api as sm
|
34
|
-
|
35
|
-
data = sm.datasets.sunspots.load_pandas().data
|
36
|
-
# print(data.exog)
|
37
|
-
data_exog = data['YEAR']
|
38
|
-
data_exog = sm.add_constant(data_exog)
|
39
|
-
data_endog = data['SUNACTIVITY']
|
40
|
-
|
41
|
-
# Instantiate a gamma family model with the default link function.
|
42
|
-
import numpy as np
|
43
|
-
|
44
|
-
gamma_model = sm.NegativeBinomial(data_endog, data_exog)
|
45
|
-
gamma_results = gamma_model.fit()
|
46
46
|
|
47
|
-
|
47
|
+
'''
|
48
|
+
TESTING_ENV = False
|
49
|
+
if TESTING_ENV:
|
48
50
|
|
49
|
-
|
50
|
-
import metacountregressor
|
51
|
-
from importlib.metadata import version
|
52
|
-
print(version('metacountregressor'))
|
53
|
-
import pandas as pd
|
54
|
-
import numpy as np
|
55
|
-
from metacountregressor.solution import ObjectiveFunction
|
56
|
-
from metacountregressor.metaheuristics import (harmony_search,
|
57
|
-
differential_evolution,
|
58
|
-
simulated_annealing)
|
51
|
+
import statsmodels.api as sm
|
59
52
|
|
60
|
-
|
61
|
-
|
53
|
+
data = sm.datasets.sunspots.load_pandas().data
|
54
|
+
# print(data.exog)
|
55
|
+
data_exog = data['YEAR']
|
56
|
+
data_exog = sm.add_constant(data_exog)
|
57
|
+
data_endog = data['SUNACTIVITY']
|
62
58
|
|
63
|
-
|
64
|
-
|
65
|
-
'rdm_cor_terms': [],
|
66
|
-
'grouped_terms': [],
|
67
|
-
'hetro_in_means': [],
|
68
|
-
'transformations': ['no', 'no'],
|
69
|
-
'dispersion': 1 # Negative Binomial
|
70
|
-
}
|
71
|
-
|
72
|
-
# Arguments
|
73
|
-
arguments = {
|
74
|
-
'algorithm': 'hs',
|
75
|
-
'test_percentage': 0,
|
76
|
-
'test_complexity': 6,
|
77
|
-
'instance_number': 'name',
|
78
|
-
'Manual_Fit': manual_fit_spec
|
79
|
-
}
|
80
|
-
obj_fun = ObjectiveFunction(data_exog, data_endog, **arguments)
|
81
|
-
#exit()
|
59
|
+
# Instantiate a gamma family model with the default link function.
|
60
|
+
import numpy as np
|
82
61
|
|
62
|
+
gamma_model = sm.NegativeBinomial(data_endog, data_exog)
|
63
|
+
gamma_results = gamma_model.fit()
|
83
64
|
|
65
|
+
print(gamma_results.summary())
|
84
66
|
|
67
|
+
# NOW LET's COMPARE THIS TO METACOUNT REGRESSOR
|
68
|
+
import metacountregressor
|
69
|
+
from importlib.metadata import version
|
70
|
+
print(version('metacountregressor'))
|
71
|
+
import pandas as pd
|
72
|
+
import numpy as np
|
73
|
+
from metacountregressor.solution import ObjectiveFunction
|
74
|
+
from metacountregressor.metaheuristics import (harmony_search,
|
75
|
+
differential_evolution,
|
76
|
+
simulated_annealing)
|
85
77
|
|
78
|
+
# Model Decisions,
|
79
|
+
manual_fit_spec = {
|
86
80
|
|
81
|
+
'fixed_terms': ['const', 'YEAR'],
|
82
|
+
'rdm_terms': [],
|
83
|
+
'rdm_cor_terms': [],
|
84
|
+
'grouped_terms': [],
|
85
|
+
'hetro_in_means': [],
|
86
|
+
'transformations': ['no', 'no'],
|
87
|
+
'dispersion': 1 # Negative Binomial
|
88
|
+
}
|
87
89
|
|
90
|
+
# Arguments
|
91
|
+
arguments = {
|
92
|
+
'algorithm': 'hs',
|
93
|
+
'test_percentage': 0,
|
94
|
+
'test_complexity': 6,
|
95
|
+
'instance_number': 'name',
|
96
|
+
'Manual_Fit': manual_fit_spec
|
97
|
+
}
|
98
|
+
obj_fun = ObjectiveFunction(data_exog, data_endog, **arguments)
|
99
|
+
'''
|
88
100
|
|
89
101
|
|
90
102
|
print('the args is:', args)
|
@@ -275,7 +287,25 @@ def main(args, **kwargs):
|
|
275
287
|
|
276
288
|
x_df = helperprocess.interactions(x_df, keep)
|
277
289
|
else: # the dataset has been selected in the program as something else
|
278
|
-
|
290
|
+
data_info = process_arguments()
|
291
|
+
data_info['hyper']
|
292
|
+
data_info['analyst']
|
293
|
+
data_info['data']['Y']
|
294
|
+
#data_info['data']['Group'][0]
|
295
|
+
#data_info['data']['Panel'][0]
|
296
|
+
args['decisions'] = data_info['analyst']
|
297
|
+
|
298
|
+
if not np.isnan(data_info['data']['Grouped'][0]):
|
299
|
+
args['group'] = data_info['data']['Grouped'][0]
|
300
|
+
args['ID'] = data_info['data']['Grouped'][0]
|
301
|
+
if not np.isnan(data_info['data']['Panel'][0]):
|
302
|
+
args['panels'] = data_info['data']['Panel'][0]
|
303
|
+
|
304
|
+
df = pd.read_csv(str(data_info['data']['Problem'][0]))
|
305
|
+
x_df = df.drop(columns=[data_info['data']['Y'][0]])
|
306
|
+
y_df = df[[data_info['data']['Y'][0]]]
|
307
|
+
y_df.rename(columns={data_info['data']['Y'][0]: "Y"}, inplace=True)
|
308
|
+
print('test') #FIXME
|
279
309
|
|
280
310
|
if args['Keep_Fit'] == str(2) or args['Keep_Fit'] == 2:
|
281
311
|
if manual_fit_spec is None:
|
@@ -294,6 +324,8 @@ def main(args, **kwargs):
|
|
294
324
|
args['panels'] = 'ind_id'
|
295
325
|
args['ID'] = 'ind_id'
|
296
326
|
|
327
|
+
|
328
|
+
|
297
329
|
args['complexity_level'] = args.get('complexity_level', 6)
|
298
330
|
|
299
331
|
|
@@ -380,7 +412,7 @@ if __name__ == '__main__':
|
|
380
412
|
epilog=main.__doc__,
|
381
413
|
formatter_class=argparse.RawDescriptionHelpFormatter, conflict_handler='resolve')
|
382
414
|
|
383
|
-
parser.add_argument('-line', type=int, default=
|
415
|
+
parser.add_argument('-line', type=int, default=1,
|
384
416
|
help='line to read in csv to pass in argument')
|
385
417
|
|
386
418
|
if vars(parser.parse_args())['line'] is not None:
|
@@ -413,7 +445,7 @@ if __name__ == '__main__':
|
|
413
445
|
override = True
|
414
446
|
if override:
|
415
447
|
print('todo turn off, in testing phase')
|
416
|
-
parser.add_argument('-problem_number', default='
|
448
|
+
parser.add_argument('-problem_number', default='10')
|
417
449
|
print('did it make it')
|
418
450
|
if 'algorithm' not in args:
|
419
451
|
parser.add_argument('-algorithm', type=str, default='hs',
|
{metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor/metaheuristics.py
RENAMED
@@ -20,8 +20,8 @@ try:
|
|
20
20
|
from .solution import ObjectiveFunction
|
21
21
|
except:
|
22
22
|
print('Exception relative import')
|
23
|
-
from
|
24
|
-
from
|
23
|
+
from pareto_file import Pareto, Solution
|
24
|
+
from solution import ObjectiveFunction
|
25
25
|
|
26
26
|
|
27
27
|
HarmonySearchResults = namedtuple('HarmonySearchResults',
|
@@ -38,8 +38,8 @@ try:
|
|
38
38
|
from .pareto_file import Pareto, Solution
|
39
39
|
from .data_split_helper import DataProcessor
|
40
40
|
except ImportError:
|
41
|
-
from
|
42
|
-
from
|
41
|
+
from _device_cust import device as dev
|
42
|
+
from pareto_file import Pareto, Solution
|
43
43
|
from data_split_helper import DataProcessor
|
44
44
|
|
45
45
|
|
@@ -232,7 +232,7 @@ class ObjectiveFunction(object):
|
|
232
232
|
if self.test_percentage == 0:
|
233
233
|
self.is_multi = False
|
234
234
|
|
235
|
-
if 'panels' in kwargs:
|
235
|
+
if 'panels' in kwargs and not np.isnan(kwargs.get('panels')):
|
236
236
|
self.group_names = np.asarray(x_data[kwargs['group']].astype('category').cat._parent.dtype.categories)
|
237
237
|
|
238
238
|
x_data[kwargs['group']] = x_data[kwargs['group']].astype(
|
@@ -279,7 +279,7 @@ class ObjectiveFunction(object):
|
|
279
279
|
|
280
280
|
exclude_this_test = [4]
|
281
281
|
|
282
|
-
if 'panels' in kwargs:
|
282
|
+
if 'panels' in kwargs and not np.isnan(kwargs.get('panels')):
|
283
283
|
self.panels = np.asarray(df_train[kwargs['panels']])
|
284
284
|
self.panels_test = np.asarray(df_test[kwargs['panels']])
|
285
285
|
self.ids = np.asarray(
|
@@ -411,9 +411,10 @@ class ObjectiveFunction(object):
|
|
411
411
|
|
412
412
|
# self._distribution = ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal', 'lindley']
|
413
413
|
|
414
|
-
self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', '
|
414
|
+
self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal'])
|
415
415
|
|
416
416
|
if self.G is not None:
|
417
|
+
#TODO need to handle this for groups
|
417
418
|
self._distribution = ["trad| " + item for item in self._distribution
|
418
419
|
] + ["grpd| " + item for item in self._distribution]
|
419
420
|
|
@@ -425,10 +426,15 @@ class ObjectiveFunction(object):
|
|
425
426
|
|
426
427
|
self.significant = 0
|
427
428
|
# define the states of our explanatory variables
|
429
|
+
|
430
|
+
|
428
431
|
self._discrete_values = self.define_alphas(self.complexity_level, exclude_this_test,
|
429
|
-
kwargs.get('must_include', []))
|
432
|
+
kwargs.get('must_include', []), extra = kwargs.get('decisions', None))
|
433
|
+
|
434
|
+
|
435
|
+
|
430
436
|
self._discrete_values = self._discrete_values + \
|
431
|
-
|
437
|
+
self.define_distributions_analyst(extra=kwargs.get('decisions', None))
|
432
438
|
|
433
439
|
if 'model_types' in kwargs:
|
434
440
|
model_types = kwargs['model_types']
|
@@ -436,7 +442,7 @@ class ObjectiveFunction(object):
|
|
436
442
|
model_types = [[0, 1]] # add 2 for Generalized Poisson
|
437
443
|
|
438
444
|
self._discrete_values = self._discrete_values + self.define_poissible_transforms(
|
439
|
-
self._transformations) + model_types
|
445
|
+
self._transformations, kwargs.get('decisions',None)) + model_types
|
440
446
|
|
441
447
|
self._model_type_codes = ['p', 'nb',
|
442
448
|
'gp', "pl", ["nb-theta", 'nb-dis']]
|
@@ -787,14 +793,60 @@ class ObjectiveFunction(object):
|
|
787
793
|
par = np.nan_to_num(par)
|
788
794
|
return par
|
789
795
|
|
790
|
-
def
|
796
|
+
def rename_distro(self, distro):
|
797
|
+
# Mapping dictionary
|
798
|
+
mapping = {
|
799
|
+
'Normal': 'normal',
|
800
|
+
'Triangular': 'triangular',
|
801
|
+
'Uniform': 'uniform',
|
802
|
+
'Log-Normal': 'ln_normal',
|
803
|
+
'Trunc-Normal': 'tn_normal'
|
804
|
+
}
|
805
|
+
|
806
|
+
# Use list comprehension with the mapping
|
807
|
+
new_distro = [mapping.get(i, i) for i in distro]
|
808
|
+
return new_distro
|
809
|
+
|
810
|
+
def define_distributions_analyst(self, extra = None):
|
811
|
+
|
812
|
+
if extra is not None:
|
813
|
+
set_alpha = []
|
814
|
+
for col in self._characteristics_names:
|
815
|
+
if col in extra[('Column')].values:
|
816
|
+
matched_index = extra[('Column')].index[extra[('Column')] == col].tolist()
|
817
|
+
distro = ast.literal_eval(extra.iloc[matched_index, 7].values.tolist()[0])
|
818
|
+
distro = self.rename_distro(distro)
|
819
|
+
set_alpha = set_alpha+[distro]
|
820
|
+
return set_alpha
|
821
|
+
return [[x for x in self._distribution]] * self._characteristics
|
822
|
+
|
823
|
+
|
824
|
+
|
825
|
+
|
826
|
+
def define_alphas(self, complexity_level=4, exclude=[], include=[], extra = None):
|
791
827
|
'complexity level'
|
792
828
|
'''
|
793
829
|
2 is feature selection,
|
794
|
-
3 is random
|
795
|
-
4 is correlated random
|
830
|
+
3 is random parameters
|
831
|
+
4 is correlated random parameters
|
832
|
+
|
833
|
+
extra is the stuff defined by the Meta APP
|
796
834
|
'''
|
797
835
|
set_alpha = []
|
836
|
+
if extra is not None:
|
837
|
+
for col in self._characteristics_names:
|
838
|
+
if col == 'const' or col == 'Constant' or col == 'constant': # no random paramaters for const
|
839
|
+
set_alpha = set_alpha + [[1]]
|
840
|
+
elif col == 'Offset':
|
841
|
+
set_alpha = set_alpha + [[1]]
|
842
|
+
|
843
|
+
elif col in extra[('Column')].values:
|
844
|
+
matched_index = extra[('Column')].index[extra[('Column')] == col].tolist()
|
845
|
+
check = list(itertools.chain(*extra.iloc[matched_index, 1:7].values))
|
846
|
+
set_alpha = set_alpha + [[x for x in range(len(check)) if check[x] == True]]
|
847
|
+
return set_alpha
|
848
|
+
|
849
|
+
|
798
850
|
for col in self._characteristics_names:
|
799
851
|
if col == 'const' or col == 'Constant' or col == 'constant': # no random paramaters for const
|
800
852
|
set_alpha = set_alpha + [[1]]
|
@@ -1238,7 +1290,7 @@ class ObjectiveFunction(object):
|
|
1238
1290
|
with open(filename, 'w') as file:
|
1239
1291
|
file.write(content)
|
1240
1292
|
|
1241
|
-
def define_poissible_transforms(self, transforms) -> list:
|
1293
|
+
def define_poissible_transforms(self, transforms, extra= None) -> list:
|
1242
1294
|
transform_set = []
|
1243
1295
|
if not isinstance(self._x_data, pd.DataFrame):
|
1244
1296
|
x_data = self._x_data.reshape(self.N * self.P, -1).copy()
|
@@ -2488,7 +2540,7 @@ class ObjectiveFunction(object):
|
|
2488
2540
|
random.seed(seed)
|
2489
2541
|
|
2490
2542
|
def set_random_seed(self):
|
2491
|
-
print('
|
2543
|
+
print('Imbedding Seed', self._random_seed)
|
2492
2544
|
np.random.seed(self._random_seed)
|
2493
2545
|
|
2494
2546
|
random.seed(self._random_seed)
|
{metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor.egg-info/PKG-INFO
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: metacountregressor
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.101
|
4
4
|
Summary: Extensions for a Python package for estimation of count models.
|
5
5
|
Home-page: https://github.com/zahern/CountDataEstimation
|
6
6
|
Author: Zeke Ahern
|
@@ -274,6 +274,8 @@ Let's begin by fitting very simple models and use the structure of these models
|
|
274
274
|
|
275
275
|
|
276
276
|
```python
|
277
|
+
|
278
|
+
'''Setup Data'''
|
277
279
|
df = pd.read_csv(
|
278
280
|
"https://raw.githubusercontent.com/zahern/data/main/Ex-16-3.csv")
|
279
281
|
X = df
|
@@ -281,25 +283,158 @@ y = df['FREQ'] # Frequency of crashes
|
|
281
283
|
X['Offset'] = np.log(df['AADT']) # Explicitley define how to offset the data, no offset otherwise
|
282
284
|
# Drop Y, selected offset term and ID as there are no panels
|
283
285
|
X = df.drop(columns=['FREQ', 'ID', 'AADT'])
|
284
|
-
|
286
|
+
'''Aguments for Solution'''
|
285
287
|
arguments = {
|
286
|
-
'
|
287
|
-
'is_multi': 1,
|
288
|
+
'is_multi': 1, #is two objectives considered
|
288
289
|
'test_percentage': 0.2, # used in multi-objective optimisation only. Saves 20% of data for testing.
|
289
290
|
'val_percentage:': 0.2, # Saves 20% of data for testing.
|
290
291
|
'test_complexity': 3, # For Very simple Models
|
291
292
|
'obj_1': 'BIC', '_obj_2': 'RMSE_TEST',
|
292
|
-
'instance_number': '
|
293
|
+
'instance_number': 'hs_run', # used for creeating a named folder where your models are saved into from the directory
|
293
294
|
'distribution': ['Normal'],
|
294
|
-
'Model': [0], # or equivalently ['POS', 'NB']
|
295
|
+
'Model': [0, 1], # or equivalently ['POS', 'NB']
|
295
296
|
'transformations': ['no', 'sqrt', 'archsinh'],
|
296
297
|
'_max_time': 10000
|
297
|
-
|
298
|
+
} '''Arguments for the solution algorithm'''
|
299
|
+
argument_hs = {
|
300
|
+
'_hms': 20, #harmony memory size,
|
301
|
+
'_mpai': 1, #adjustement inded
|
302
|
+
'_par': 0.3,
|
303
|
+
'_hmcr': .5
|
304
|
+
}
|
298
305
|
obj_fun = ObjectiveFunction(X, y, **arguments)
|
299
|
-
results = harmony_search(obj_fun)
|
306
|
+
results = harmony_search(obj_fun, None, argument_hs)
|
300
307
|
print(results)
|
301
308
|
```
|
302
309
|
|
310
|
+
## Example: Assistance by Differential Evololution and Simulated Annealing
|
311
|
+
Similiar to the above example we only need to change the hyperparamaters, the obj_fun can remane the same
|
312
|
+
|
313
|
+
|
314
|
+
```python
|
315
|
+
argument_de = {'_AI': 2,
|
316
|
+
'_crossover_perc': .2,
|
317
|
+
'_max_iter': 1000,
|
318
|
+
'_pop_size': 25
|
319
|
+
}
|
320
|
+
de_results = differential_evolution(obj_fun, None, **argument_de)
|
321
|
+
print(de_results)
|
322
|
+
|
323
|
+
|
324
|
+
args_sa = {'alpha': .99,
|
325
|
+
'STEPS_PER_TEMP': 10,
|
326
|
+
'INTL_ACPT': 0.5,
|
327
|
+
'_crossover_perc': .3,
|
328
|
+
'MAX_ITERATIONS': 1000,
|
329
|
+
'_num_intl_slns': 25,
|
330
|
+
}
|
331
|
+
|
332
|
+
sa_results = simulated_annealing(obj_fun, None, **args_sa)
|
333
|
+
print(sa_results)
|
334
|
+
```
|
335
|
+
|
336
|
+
## Comparing to statsmodels
|
337
|
+
The following example illustrates how the output compares to well-known packages, including Statsmodels."
|
338
|
+
|
339
|
+
|
340
|
+
```python
|
341
|
+
# Load modules and data
|
342
|
+
import statsmodels.api as sm
|
343
|
+
|
344
|
+
data = sm.datasets.sunspots.load_pandas().data
|
345
|
+
#print(data.exog)
|
346
|
+
data_exog = data['YEAR']
|
347
|
+
data_exog = sm.add_constant(data_exog)
|
348
|
+
data_endog = data['SUNACTIVITY']
|
349
|
+
|
350
|
+
# Instantiate a gamma family model with the default link function.
|
351
|
+
import numpy as np
|
352
|
+
|
353
|
+
gamma_model = sm.NegativeBinomial(data_endog, data_exog)
|
354
|
+
gamma_results = gamma_model.fit()
|
355
|
+
|
356
|
+
print(gamma_results.summary())
|
357
|
+
|
358
|
+
|
359
|
+
|
360
|
+
|
361
|
+
#NOW LET's COMPARE THIS TO METACOUNTREGRESSOR
|
362
|
+
|
363
|
+
|
364
|
+
|
365
|
+
|
366
|
+
#Model Decisions,
|
367
|
+
manual_fit_spec = {
|
368
|
+
'fixed_terms': ['const','YEAR'],
|
369
|
+
'rdm_terms': [],
|
370
|
+
'rdm_cor_terms': [],
|
371
|
+
'grouped_terms': [],
|
372
|
+
'hetro_in_means': [],
|
373
|
+
'transformations': ['no', 'no'],
|
374
|
+
'dispersion': 1 #Negative Binomial
|
375
|
+
}
|
376
|
+
|
377
|
+
|
378
|
+
#Arguments
|
379
|
+
arguments = {
|
380
|
+
'algorithm': 'hs',
|
381
|
+
'test_percentage': 0,
|
382
|
+
'test_complexity': 6,
|
383
|
+
'instance_number': 'name',
|
384
|
+
'Manual_Fit': manual_fit_spec
|
385
|
+
}
|
386
|
+
obj_fun = ObjectiveFunction(data_exog, data_endog, **arguments)
|
387
|
+
|
388
|
+
|
389
|
+
|
390
|
+
|
391
|
+
|
392
|
+
|
393
|
+
|
394
|
+
```
|
395
|
+
|
396
|
+
Optimization terminated successfully.
|
397
|
+
Current function value: 4.877748
|
398
|
+
Iterations: 22
|
399
|
+
Function evaluations: 71
|
400
|
+
Gradient evaluations: 70
|
401
|
+
NegativeBinomial Regression Results
|
402
|
+
==============================================================================
|
403
|
+
Dep. Variable: SUNACTIVITY No. Observations: 309
|
404
|
+
Model: NegativeBinomial Df Residuals: 307
|
405
|
+
Method: MLE Df Model: 1
|
406
|
+
Date: Tue, 13 Aug 2024 Pseudo R-squ.: 0.004087
|
407
|
+
Time: 14:13:22 Log-Likelihood: -1507.2
|
408
|
+
converged: True LL-Null: -1513.4
|
409
|
+
Covariance Type: nonrobust LLR p-value: 0.0004363
|
410
|
+
==============================================================================
|
411
|
+
coef std err z P>|z| [0.025 0.975]
|
412
|
+
------------------------------------------------------------------------------
|
413
|
+
const 0.2913 1.017 0.287 0.774 -1.701 2.284
|
414
|
+
YEAR 0.0019 0.001 3.546 0.000 0.001 0.003
|
415
|
+
alpha 0.7339 0.057 12.910 0.000 0.622 0.845
|
416
|
+
==============================================================================
|
417
|
+
0.1.88
|
418
|
+
Setup Complete...
|
419
|
+
Benchmaking test with Seed 42
|
420
|
+
1
|
421
|
+
--------------------------------------------------------------------------------
|
422
|
+
Log-Likelihood: -1509.0683662284273
|
423
|
+
--------------------------------------------------------------------------------
|
424
|
+
bic: 3035.84
|
425
|
+
--------------------------------------------------------------------------------
|
426
|
+
MSE: 10000000.00
|
427
|
+
+--------+--------+-------+----------+----------+------------+
|
428
|
+
| Effect | $\tau$ | Coeff | Std. Err | z-values | Prob |z|>Z |
|
429
|
+
+========+========+=======+==========+==========+============+
|
430
|
+
| const | no | 0.10 | 0.25 | 0.39 | 0.70 |
|
431
|
+
+--------+--------+-------+----------+----------+------------+
|
432
|
+
| YEAR | no | 0.00 | 0.00 | 20.39 | 0.00*** |
|
433
|
+
+--------+--------+-------+----------+----------+------------+
|
434
|
+
| nb | | 1.33 | 0.00 | 50.00 | 0.00*** |
|
435
|
+
+--------+--------+-------+----------+----------+------------+
|
436
|
+
|
437
|
+
|
303
438
|
## Paper
|
304
439
|
|
305
440
|
The following tutorial is in conjunction with our latest paper. A link the current paper can be found here [MetaCountRegressor](https://www.overleaf.com/read/mszwpwzcxsng#c5eb0c)
|
File without changes
|
File without changes
|
File without changes
|
{metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor/data_split_helper.py
RENAMED
File without changes
|
File without changes
|
{metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor/helperprocess.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor/pareto_logger__plot.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor/test_generated_paper2.py
RENAMED
File without changes
|
File without changes
|
{metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor.egg-info/not-zip-safe
RENAMED
File without changes
|
{metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor.egg-info/requires.txt
RENAMED
File without changes
|
{metacountregressor-0.1.91 → metacountregressor-0.1.101}/metacountregressor.egg-info/top_level.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|