econmethods 2.7__tar.gz → 2.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: econmethods
3
- Version: 2.7
3
+ Version: 2.8
4
4
  Summary: A python package implementing various econometrical tests and estimators
5
5
  Home-page: https://github.com/NaturionBG/econmethods
6
6
  Author: NaturionBG
@@ -309,7 +309,6 @@ class FECM:
309
309
  - coint = "x1"
310
310
  - coint = ["x1", "x3", ...]\n
311
311
  Defaults to "x1"
312
- - *include_x_diffs*: Specify whether the model should include the differences of exogenous variables. Defaults to True.
313
312
  - *intercept*: Specify whether the ECM model should have an intercept. Defaults to True.
314
313
  - *stat_vars*: a DataFrame of the same format as "df" - includes variables that will not be differenced and included into the ECM in their raw form. Ensure these variables are I(0). Defaults to None.
315
314
  - *lr_const*: Specify whether the long-run model should have a constant.
@@ -322,7 +321,7 @@ class FECM:
322
321
  If a CCE- method is chosen:
323
322
  - The AR(d) estimation results to forecast the cross-sectional mean | key = "ar"
324
323
  '''
325
- def __init__(self, df: pd.DataFrame, effects: str = 'rand', trend: int = 0, n_lags: int = 1, method: str = 'MG', coint: str | list[str] = 'x1', include_x_diffs: bool = True, intercept: bool = True, stat_vars: pd.DataFrame|None = None, lr_const: bool = False) -> None:
324
+ def __init__(self, df: pd.DataFrame, effects: str = 'rand', trend: int = 0, n_lags: int = 1, method: str = 'MG', coint: str | list[str] = 'x1', intercept: bool = True, stat_vars: pd.DataFrame|None = None, lr_const: bool = False) -> None:
326
325
  self.__df = df
327
326
  self.__eff = effects.lower()
328
327
  self.__t = trend
@@ -332,8 +331,7 @@ class FECM:
332
331
  self.__exog = len(df.columns[3:])
333
332
  self.__l =[]
334
333
  self.__stat_vars = stat_vars
335
- self.__mean_names = ['target_avg']
336
- self.__x_difs = include_x_diffs
334
+ self.__mean_names = ['target_avg_l1']
337
335
  self.__stat = []
338
336
  self.__lr_c = lr_const
339
337
  if stat_vars is not None:
@@ -360,9 +358,10 @@ class FECM:
360
358
  self.__lag = 1
361
359
  self.__verify()
362
360
  self.__means = self.build_means()
363
- if self.__method == 'ccemg' or method == 'ccep':
364
- self.__ar = self.select_ar()
365
361
  self.__lr = self.__estimate_lr()
362
+ self.__df = pd.concat([self.__df, pd.Series(self.__lr.resid, name='error')], axis=1)
363
+ self.__ccemg_units = self.get_ccemg_frames(1)
364
+ self.__mg_units = self.get_mg_frames(1)
366
365
  self.__sr = self.build_sr()
367
366
 
368
367
  def __verify(self) -> None:
@@ -387,15 +386,7 @@ class FECM:
387
386
  mn = mn.set_index('time')
388
387
  means = mn.groupby('time')[['target'] + self.__l].mean()
389
388
  means.columns = self.__mean_names
390
- means = pd.concat([means, means.shift([1])], axis=1)
391
- for var in means.columns[:len(self.__l)+1]:
392
- if 'target' not in var:
393
- if self.__x_difs:
394
- means[f'{var}_diff'] = means[var] - means[f'{var}_1']
395
- means = means.drop(columns=[f'{var}_1'])
396
- else:
397
- means[f'{var}_diff'] = means[var] - means[f'{var}_1']
398
- means = means.drop(columns=[f'{var}_1'])
389
+ means['target_avg_l1'] = means['target_avg_l1'].shift(1)
399
390
  return means
400
391
 
401
392
  def build_GLS(self, w_err: float) -> np.ndarray:
@@ -452,32 +443,15 @@ class FECM:
452
443
  lr_re_matrix = self.build_GLS(resid)
453
444
  return sm.GLS(self.__lr_df.loc[:, 'target'], self.__lr_df.iloc[:, 3:], lr_re_matrix).fit()
454
445
 
455
- def select_ar(self) -> Any:
456
- current_d = self.__lag+1
457
- while current_d >= 1:
458
- frame = pd.DataFrame(self.__means.target_avg)
459
- temp = []
460
- for lag in range(1, current_d+1):
461
- frame.loc[:, f'y_avg{lag}'] = frame['target_avg'].shift(lag)
462
- temp.append(f'y_avg{lag}')
463
- frame = frame.dropna()
464
- part_res = sm.OLS(frame.target_avg, frame[temp]).fit()
465
- if part_res.pvalues[temp[-1]] < 0.05:
466
- break
467
- else:
468
- current_d -=1
469
- print(f'Selected AR lag amount: {current_d}')
470
- return part_res
471
446
 
472
447
  def get_ccemg_frames(self, max_lag: int) -> list[pd.DataFrame]:
473
448
  subdfs = []
474
449
  for unit in self.__df.SpUnit.unique():
475
450
  subdf = self.__df[self.__df.SpUnit == unit].copy(deep=True)
476
- if self.__x_difs:
477
- for var in self.__l:
478
- subdf[f'{var}_lag1'] = subdf[var].shift(1)
479
- subdf[f'{var}_diff'] = subdf[var]- subdf[f'{var}_lag1']
480
- subdf = subdf.drop(columns = [f'{var}_lag1'])
451
+ for var in self.__l:
452
+ subdf[f'{var}_lag1'] = subdf[var].shift(1)
453
+ subdf[f'{var}_diff'] = subdf[var]- subdf[f'{var}_lag1']
454
+ subdf = subdf.drop(columns = [f'{var}_lag1'])
481
455
  subdf['target_lag1'] = subdf['target'].shift(1)
482
456
  subdf.insert(2, 'target_diff', subdf['target'] - subdf['target_lag1'])
483
457
  subdf = subdf.drop(columns = ['target_lag1', *self.__l, 'target'])
@@ -493,11 +467,10 @@ class FECM:
493
467
  subdfs = []
494
468
  for unit in self.__df.SpUnit.unique():
495
469
  subdf = self.__df[self.__df.SpUnit == unit].copy(deep=True)
496
- if self.__x_difs:
497
- for var in self.__l:
498
- subdf[f'{var}_lag1'] = subdf[var].shift(1)
499
- subdf[f'{var}_diff'] = subdf[var]- subdf[f'{var}_lag1']
500
- subdf = subdf.drop(columns = [f'{var}_lag1'])
470
+ for var in self.__l:
471
+ subdf[f'{var}_lag1'] = subdf[var].shift(1)
472
+ subdf[f'{var}_diff'] = subdf[var]- subdf[f'{var}_lag1']
473
+ subdf = subdf.drop(columns = [f'{var}_lag1'])
501
474
  subdf['target_lag1'] = subdf['target'].shift(1)
502
475
  subdf.insert(2, 'target_diff', subdf['target'] - subdf['target_lag1'])
503
476
  subdf = subdf.drop(columns = ['target_lag1', *self.__l, 'target'])
@@ -508,24 +481,27 @@ class FECM:
508
481
  subdfs.append(subdf.dropna())
509
482
  return subdfs
510
483
 
511
- def build_sr(self) -> pd.DataFrame:
512
- self.__df = pd.concat([self.__df, pd.Series(self.__lr.resid, name='error')], axis=1)
484
+ def build_sr(self, drop: list = []) -> pd.DataFrame:
513
485
  est = []
514
486
  if self.__method == 'ccemg':
515
- units = self.get_ccemg_frames(self.__lag)
516
- for model in units:
487
+ units = self.__ccemg_units
488
+ for i, model in enumerate(units):
489
+ model = model.drop(columns=drop)
517
490
  if self.__C:
518
491
  est.append(sm.OLS(model['target_diff'], sm.add_constant(model.iloc[:, 3:])).fit())
519
492
  else:
520
493
  est.append(sm.OLS(model['target_diff'], model.iloc[:, 3:]).fit())
494
+ self.__ccemg_units[i] = self.__ccemg_units[i].drop(columns=drop)
521
495
  return est
522
496
  elif self.__method == 'mg':
523
- units = self.get_mg_frames(self.__lag)
524
- for model in units:
497
+ units = self.__mg_units
498
+ for i, model in enumerate(units):
499
+ model = model.drop(columns=drop)
525
500
  if self.__C:
526
501
  est.append(sm.OLS(model['target_diff'], sm.add_constant(model.iloc[:, 3:])).fit())
527
502
  else:
528
503
  est.append(sm.OLS(model['target_diff'], model.iloc[:, 3:]).fit())
504
+ self.__ccemg_units[i] = self.__ccemg_units[i].drop(columns=drop)
529
505
  return est
530
506
  elif self.__method == 'ccep':
531
507
  units = self.get_ccemg_frames(self.__lag)
@@ -534,37 +510,60 @@ class FECM:
534
510
  est.append(sm.OLS(pool['target_diff'], sm.add_constant(pool.iloc[:, 3:])).fit())
535
511
  else:
536
512
  est.append(sm.OLS(pool['target_diff'], pool.iloc[:, 3:]).fit())
513
+ while True:
514
+ flag = True
515
+ if max(zip(est[0].params.index, est[0].pvalues), key=lambda x: x[1])[1] > 0.06:
516
+ pool = pool.drop(columns=[max(zip(est[0].params.index, est[0].pvalues), key=lambda x: x[1])[0]])
517
+ flag=False
518
+ if flag:
519
+ break
520
+ else:
521
+ if self.__C:
522
+ est[0] = sm.OLS(pool['target_diff'], sm.add_constant(pool.iloc[:, 3:])).fit()
523
+ else:
524
+ est[0] = sm.OLS(pool['target_diff'], pool.iloc[:, 3:]).fit()
537
525
  return est
538
526
 
527
+ def mg_algorithm(self) -> pd.DataFrame:
528
+ coefs = []
529
+ rsq = []
530
+ for result in self.__sr:
531
+ coefs.append(result.params)
532
+ rsq.append(result.rsquared)
533
+ coef_mean = pd.concat(coefs, axis=1).mean(axis=1)
534
+ coef_mean.name = 'Mean Coefs'
535
+ coef_std = pd.concat(coefs, axis=1).std(axis=1)
536
+ coef_mse = coef_std/np.sqrt(self.__N)
537
+ t_means = coef_mean / coef_mse
538
+ mg_W = sc.chi2(self.__exog).sf(np.sum(coef_mean**2 / coef_mse**2))
539
+ tpvalues_mean = t_means.apply(lambda x: 2*min(sc.t(self.__N-1).cdf(x), sc.t(self.__N-1).sf(x)))
540
+ tpvalues_mean.name = 'T-pvalues'
541
+ rsq_mean = np.array(rsq).mean()
542
+ res = {
543
+ 'Rsquared': rsq_mean,
544
+ 'W_Pvalue': mg_W,
545
+ 'coefs': pd.concat([coef_mean, tpvalues_mean], axis=1)
546
+ }
547
+ return res
548
+
539
549
  def fit(self) -> dict:
540
550
  dct = dict()
541
551
  if self.__method == 'ccep':
542
552
  dct['sr_res'] = self.__sr[0]
543
553
  dct['lr_res'] = self.__lr
544
- dct['ar'] = self.__ar
545
554
  elif self.__method == 'ccemg' or self.__method == 'mg':
546
555
  dct['lr_res'] = self.__lr
547
- if self.__method == 'ccemg':
548
- dct['ar'] = self.__ar
549
- coefs = []
550
- rsq = []
551
- for result in self.__sr:
552
- coefs.append(result.params)
553
- rsq.append(result.rsquared)
554
- coef_mean = pd.concat(coefs, axis=1).mean(axis=1)
555
- coef_mean.name = 'Mean Coefs'
556
- coef_std = pd.concat(coefs, axis=1).std(axis=1)
557
- coef_mse = coef_std/np.sqrt(self.__N)
558
- t_means = coef_mean / coef_mse
559
- mg_W = sc.chi2(self.__exog).sf(np.sum(coef_mean**2 / coef_mse**2))
560
- tpvalues_mean = t_means.apply(lambda x: 2*min(sc.t(self.__N-1).cdf(x), sc.t(self.__N-1).sf(x)))
561
- tpvalues_mean.name = 'T-pvalues'
562
- rsq_mean = np.array(rsq).mean()
563
- res = {
564
- 'Rsquared': rsq_mean,
565
- 'W_Pvalue': mg_W,
566
- 'coefs': pd.concat([coef_mean, tpvalues_mean], axis=1)
567
- }
556
+ res = self.mg_algorithm()
557
+ while True:
558
+ flag = True
559
+ if max(zip(res['coefs'].index, res['coefs']['T-pvalues']), key=lambda x: x[1])[1] > 0.06:
560
+ self.__sr = self.build_sr([max(zip(res['coefs'].index, res['coefs']['T-pvalues']), key=lambda x: x[1])[0]])
561
+ flag=False
562
+ if flag:
563
+ break
564
+ else:
565
+ res = self.mg_algorithm()
566
+
568
567
  dct['sr_res'] = res
569
568
  return dct
570
569
 
@@ -572,6 +571,8 @@ class FECM:
572
571
  pass
573
572
 
574
573
 
574
+
575
+
575
576
  class CDTwoWay:
576
577
  '''
577
578
  Implementation of the CD test to validate/reject cross-sectional dependence.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: econmethods
3
- Version: 2.7
3
+ Version: 2.8
4
4
  Summary: A python package implementing various econometrical tests and estimators
5
5
  Home-page: https://github.com/NaturionBG/econmethods
6
6
  Author: NaturionBG
@@ -5,7 +5,7 @@ with open('README.md', 'r') as f:
5
5
 
6
6
  setup(
7
7
  name = 'econmethods',
8
- version = 'v2.7',
8
+ version = 'v2.8',
9
9
  description='A python package implementing various econometrical tests and estimators',
10
10
  packages = find_packages(),
11
11
  long_description=long_desc,
File without changes
File without changes