my-markdown-library 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/F24LS_md/ Lecture 4 - Public.md +347 -0
- data/F24LS_md/Lecture 1 - Introduction and Overview.md +327 -0
- data/F24LS_md/Lecture 10 - Development_.md +631 -0
- data/F24LS_md/Lecture 11 - Econometrics.md +345 -0
- data/F24LS_md/Lecture 12 - Finance.md +692 -0
- data/F24LS_md/Lecture 13 - Environmental Economics.md +299 -0
- data/F24LS_md/Lecture 15 - Conclusion.md +272 -0
- data/F24LS_md/Lecture 2 - Demand.md +349 -0
- data/F24LS_md/Lecture 3 - Supply.md +329 -0
- data/F24LS_md/Lecture 5 - Production C-D.md +291 -0
- data/F24LS_md/Lecture 6 - Utility and Latex.md +440 -0
- data/F24LS_md/Lecture 7 - Inequality.md +607 -0
- data/F24LS_md/Lecture 8 - Macroeconomics.md +704 -0
- data/F24LS_md/Lecture 8 - Macro.md +700 -0
- data/F24LS_md/Lecture 9 - Game Theory_.md +436 -0
- data/F24LS_md/summary.yaml +105 -0
- data/F24Lec_MD/LecNB_summary.yaml +206 -0
- data/F24Lec_MD/lec01/lec01.md +267 -0
- data/F24Lec_MD/lec02/Avocados_demand.md +425 -0
- data/F24Lec_MD/lec02/Demand_Steps_24.md +126 -0
- data/F24Lec_MD/lec02/PriceElasticity.md +83 -0
- data/F24Lec_MD/lec02/ScannerData_Beer.md +171 -0
- data/F24Lec_MD/lec02/demand-curve-Fa24.md +213 -0
- data/F24Lec_MD/lec03/3.0-CubicCostCurve.md +239 -0
- data/F24Lec_MD/lec03/3.1-Supply.md +274 -0
- data/F24Lec_MD/lec03/3.2-sympy.md +332 -0
- data/F24Lec_MD/lec03/3.3a-california-energy.md +120 -0
- data/F24Lec_MD/lec03/3.3b-a-really-hot-tuesday.md +121 -0
- data/F24Lec_MD/lec04/lec04-CSfromSurvey-closed.md +335 -0
- data/F24Lec_MD/lec04/lec04-CSfromSurvey.md +331 -0
- data/F24Lec_MD/lec04/lec04-Supply-Demand-closed.md +519 -0
- data/F24Lec_MD/lec04/lec04-Supply-Demand.md +514 -0
- data/F24Lec_MD/lec04/lec04-four-plot-24.md +34 -0
- data/F24Lec_MD/lec04/lec04-four-plot.md +34 -0
- data/F24Lec_MD/lec05/Lec5-Cobb-Douglas.md +131 -0
- data/F24Lec_MD/lec05/Lec5-CobbD-AER1928.md +283 -0
- data/F24Lec_MD/lec06/6.1-Sympy-Differentiation.md +253 -0
- data/F24Lec_MD/lec06/6.2-3D-utility.md +287 -0
- data/F24Lec_MD/lec06/6.3-QuantEcon-Optimization.md +399 -0
- data/F24Lec_MD/lec06/6.4-latex.md +138 -0
- data/F24Lec_MD/lec06/6.5-Edgeworth.md +269 -0
- data/F24Lec_MD/lec07/7.1-inequality.md +283 -0
- data/F24Lec_MD/lec07/7.2-historical-inequality.md +237 -0
- data/F24Lec_MD/lec08/macro-fred-api.md +313 -0
- data/F24Lec_MD/lec09/lecNB-prisoners-dilemma.md +88 -0
- data/F24Lec_MD/lec10/Lec10.2-waterguard.md +401 -0
- data/F24Lec_MD/lec10/lec10.1-mapping.md +199 -0
- data/F24Lec_MD/lec11/11.1-slr.md +305 -0
- data/F24Lec_MD/lec11/11.2-mlr.md +171 -0
- data/F24Lec_MD/lec12/Lec12-4-PersonalFinance.md +590 -0
- data/F24Lec_MD/lec12/lec12-1_Interest_Payments.md +267 -0
- data/F24Lec_MD/lec12/lec12-2-stocks-options.md +235 -0
- data/F24Lec_MD/lec13/Co2_ClimateChange.md +139 -0
- data/F24Lec_MD/lec13/ConstructingMAC.md +213 -0
- data/F24Lec_MD/lec13/EmissionsTracker.md +170 -0
- data/F24Lec_MD/lec13/KuznetsHypothesis.md +219 -0
- data/F24Lec_MD/lec13/RoslingPlots.md +217 -0
- data/F24Lec_MD/lec15/vibecession.md +485 -0
- data/F24Textbook_MD/00-intro/index.md +292 -0
- data/F24Textbook_MD/01-demand/01-demand.md +152 -0
- data/F24Textbook_MD/01-demand/02-example.md +131 -0
- data/F24Textbook_MD/01-demand/03-log-log.md +284 -0
- data/F24Textbook_MD/01-demand/04-elasticity.md +248 -0
- data/F24Textbook_MD/01-demand/index.md +15 -0
- data/F24Textbook_MD/02-supply/01-supply.md +203 -0
- data/F24Textbook_MD/02-supply/02-eep147-example.md +86 -0
- data/F24Textbook_MD/02-supply/03-sympy.md +138 -0
- data/F24Textbook_MD/02-supply/04-market-equilibria.md +204 -0
- data/F24Textbook_MD/02-supply/index.md +16 -0
- data/F24Textbook_MD/03-public/govt-intervention.md +73 -0
- data/F24Textbook_MD/03-public/index.md +10 -0
- data/F24Textbook_MD/03-public/surplus.md +351 -0
- data/F24Textbook_MD/03-public/taxes-subsidies.md +282 -0
- data/F24Textbook_MD/04-production/index.md +15 -0
- data/F24Textbook_MD/04-production/production.md +178 -0
- data/F24Textbook_MD/04-production/shifts.md +296 -0
- data/F24Textbook_MD/05-utility/budget-constraints.md +166 -0
- data/F24Textbook_MD/05-utility/index.md +15 -0
- data/F24Textbook_MD/05-utility/utility.md +136 -0
- data/F24Textbook_MD/06-inequality/historical-inequality.md +253 -0
- data/F24Textbook_MD/06-inequality/index.md +15 -0
- data/F24Textbook_MD/06-inequality/inequality.md +226 -0
- data/F24Textbook_MD/07-game-theory/bertrand.md +257 -0
- data/F24Textbook_MD/07-game-theory/cournot.md +333 -0
- data/F24Textbook_MD/07-game-theory/equilibria-oligopolies.md +96 -0
- data/F24Textbook_MD/07-game-theory/expected-utility.md +61 -0
- data/F24Textbook_MD/07-game-theory/index.md +19 -0
- data/F24Textbook_MD/07-game-theory/python-classes.md +340 -0
- data/F24Textbook_MD/08-development/index.md +35 -0
- data/F24Textbook_MD/09-macro/CentralBanks.md +101 -0
- data/F24Textbook_MD/09-macro/Indicators.md +77 -0
- data/F24Textbook_MD/09-macro/fiscal_policy.md +36 -0
- data/F24Textbook_MD/09-macro/index.md +14 -0
- data/F24Textbook_MD/09-macro/is_curve.md +76 -0
- data/F24Textbook_MD/09-macro/phillips_curve.md +70 -0
- data/F24Textbook_MD/10-finance/index.md +10 -0
- data/F24Textbook_MD/10-finance/options.md +178 -0
- data/F24Textbook_MD/10-finance/value-interest.md +60 -0
- data/F24Textbook_MD/11-econometrics/index.md +16 -0
- data/F24Textbook_MD/11-econometrics/multivariable.md +218 -0
- data/F24Textbook_MD/11-econometrics/reading-econ-papers.md +25 -0
- data/F24Textbook_MD/11-econometrics/single-variable.md +483 -0
- data/F24Textbook_MD/11-econometrics/statsmodels.md +58 -0
- data/F24Textbook_MD/12-environmental/KuznetsHypothesis-Copy1.md +187 -0
- data/F24Textbook_MD/12-environmental/KuznetsHypothesis.md +187 -0
- data/F24Textbook_MD/12-environmental/MAC.md +254 -0
- data/F24Textbook_MD/12-environmental/index.md +36 -0
- data/F24Textbook_MD/LICENSE.md +11 -0
- data/F24Textbook_MD/intro.md +26 -0
- data/F24Textbook_MD/references.md +25 -0
- data/F24Textbook_MD/summary.yaml +414 -0
- metadata +155 -0
@@ -0,0 +1,485 @@
|
|
1
|
+
---
|
2
|
+
title: "vibecession"
|
3
|
+
type: lecture-notebook
|
4
|
+
week: 15
|
5
|
+
source_path: "/Users/ericvandusen/Documents/Data88E-ForTraining/F24Lec_NBs/lec15/vibecession.ipynb"
|
6
|
+
---
|
7
|
+
|
8
|
+
## Vibecession - The Notebook!
|
9
|
+
The purpose of this notebook is to get the data and check the vibes behind the post-COVID drop in Consumer Sentiment
|
10
|
+
|
11
|
+
The concept of vibecession and the testing it in a notebook folllows from a couple key sources
|
12
|
+
- A Tweet by **Quantian** that [showed a test of the hypothesis](https://threadreaderapp.com/thread/1688397994821873664.html#google_vignette) - that this notebook is recreating
|
13
|
+
|
14
|
+
- Which was followed up by a FT recreation - for multiple countries - and how partisan this gap is ! You can read about it here [Should we believe Americans when they say the economy is bad? John Burn-Murdoch](https://www.ft.com/content/9c7931aa-4973-475e-9841-d7ebd54b0f47)
|
15
|
+
|
16
|
+
### But our starting point can be a more accessible commentator
|
17
|
+
|
18
|
+
|
19
|
+
[Kyla Scanlon](https://kylascanlon.com/), an online content creator and independent economics educator, coined the term "vibecession" to describe a phenomenon where public sentiment about the economy is overwhelmingly negative, despite relatively positive economic indicators like GDP growth and low unemployment rates. The term is a portmanteau of "vibes" and "recession," suggesting an economic downturn driven primarily by negative public sentiment rather than direct economic metrics.
|
20
|
+
|
21
|
+
The concept of the vibecession was introduced by Scanlon in a June 2022 newsletter, amidst observations that, while hard economic data was showing signs of stability and growth, the general sentiment among the public remained pessimistic. This disconnect, where the public feels economically insecure despite positive indicators, has been a central theme in Scanlon's discussions on platforms like TikTok, where she aims to make complex economic concepts more accessible and engaging to a broader audience, particularly younger people.
|
22
|
+
|
23
|
+
|
24
|
+
[Kyla Scanlon](https://www.marketplace.org/2022/09/07/for-tiktok-maker-kyla-scanlon-its-about-making-finance-fun-and-a-bit-chaotic/) on Marketplace
|
25
|
+
|
26
|
+
```python
|
27
|
+
import pandas as pd
|
28
|
+
import matplotlib.pyplot as plt
|
29
|
+
import statsmodels.api as sm
|
30
|
+
from sklearn.metrics import mean_absolute_error
|
31
|
+
import plotly.express as px
|
32
|
+
```
|
33
|
+
|
34
|
+
```python
|
35
|
+
# try to import fredapi
|
36
|
+
try:
|
37
|
+
from fredapi import Fred
|
38
|
+
except ImportError:
|
39
|
+
!pip install fredapi
|
40
|
+
from fredapi import Fred
|
41
|
+
|
42
|
+
# try to import yfinance
|
43
|
+
try:
|
44
|
+
import yfinance as yf
|
45
|
+
except ImportError:
|
46
|
+
!pip install yfinance
|
47
|
+
import yfinance as yf
|
48
|
+
fred = Fred(api_key='e3053cdc3e94dfb2b73c5945b0d1b1f7')
|
49
|
+
```
|
50
|
+
|
51
|
+
## Part 1 - Gather the Data
|
52
|
+
- The following sections are how we source the data
|
53
|
+
- This may be too boring and pedantic - but they are related to the learnign outcomes of the class
|
54
|
+
- We will use a combination of the python package `fredapi` to dowload a few time series from the Federal Reserve Bank of St Louis
|
55
|
+
- And python package `yfinance` (derived from Yahoo Finance) to get additional series
|
56
|
+
- An enterprising student could add additional series to the modeling process
|
57
|
+
*Skip to Part 2 if you just want to see the results*
|
58
|
+
|
59
|
+
## Consumer Sentiment
|
60
|
+
|
61
|
+
The [University of Michigan Consumer Sentiment Index](http://www.sca.isr.umich.edu/) is an economic indicator that assesses the confidence, conditions, and expectations of U.S. consumers regarding their financial situation and the general state of the economy. The index is based on a monthly survey of approximately 500 households regarding their personal finances, business conditions, and buying conditions. It is divided into two parts: the Index of Consumer Expectations and the Current Economic Conditions Index.
|
62
|
+
|
63
|
+
This data is significant as it can provide insights into consumer behavior, which helps in predicting changes in spending and saving habits. Higher consumer confidence typically indicates that people feel secure in their personal financial situation and thus are more likely to increase spending, which is a key driver of economic growth.
|
64
|
+
|
65
|
+
The index is often used by analysts and policymakers to understand consumer sentiment and its potential impact on the economy. For instance, rising sentiment can suggest increased consumer spending and economic expansion, while declining sentiment might indicate economic slowdowns or recessions.
|
66
|
+
|
67
|
+
```python
|
68
|
+
# This is the data we want to model, we are going to get the UMCSENT series from FRED
|
69
|
+
UMCSENT = fred.get_series('UMCSENT', observation_start='1979-01-01', observation_end='2024-03-01')
|
70
|
+
UMCSENT
|
71
|
+
print(UMCSENT.index.tzinfo)
|
72
|
+
UMCSENT
|
73
|
+
```
|
74
|
+
|
75
|
+
Lets take a look at the data that we are trying to model
|
76
|
+
|
77
|
+
The time range is roughly the last 40 years
|
78
|
+
|
79
|
+
```python
|
80
|
+
fig = px.line(UMCSENT, title='Consumer Sentiment Index')
|
81
|
+
fig.update_yaxes(range=[0, 120])
|
82
|
+
fig.show()
|
83
|
+
```
|
84
|
+
|
85
|
+
### Get some Features eg Explanatory Variables for our Model
|
86
|
+
|
87
|
+
Now we want to search for some basic economics variables that could explain how Consumers are Feeling
|
88
|
+
|
89
|
+
Variables that were suggested by Quantian on Twitter were the following
|
90
|
+
|
91
|
+
- Inflation rate
|
92
|
+
- Inflation rate change
|
93
|
+
- Unemployment
|
94
|
+
- Unemployment change
|
95
|
+
- Housing prices
|
96
|
+
- Real wages
|
97
|
+
- Dollar strength
|
98
|
+
- Interest rates
|
99
|
+
- Stock prices
|
100
|
+
|
101
|
+
For the first set of Series we will go FRED and download them according to the series name
|
102
|
+
|
103
|
+
```python
|
104
|
+
#Unemployment rate
|
105
|
+
UNRATE=fred.get_series('UNRATE', observation_start='1979-01-01', observation_end='2024-03-01')
|
106
|
+
#Inflation rate
|
107
|
+
CPIAUSCL= fred.get_series('CPIAUCSL', observation_start='1979-01-01', observation_end='2024-03-01')
|
108
|
+
#GDP
|
109
|
+
GDP=fred.get_series('GDP', observation_start='1979-01-01', observation_end='2024-03-01')
|
110
|
+
#housing price change
|
111
|
+
USSTHPI=fred.get_series('USSTHPI', observation_start='1979-01-01', observation_end='2024-03-01')
|
112
|
+
#intereste rate
|
113
|
+
FEDFUNDSS=fred.get_series('FEDFUNDS', observation_start='1979-01-01', observation_end='2024-03-01')
|
114
|
+
#Real Wages
|
115
|
+
WAGES=fred.get_series('LES1252881600Q', observation_start='1979-01-01', observation_end='2024-03-01')
|
116
|
+
|
117
|
+
# The following Series ARE available on FRED but not with the time range we need
|
118
|
+
#S&P 500
|
119
|
+
#SP500=fred.get_series('SP500', observation_start='1979-01-01', observation_end='2024-03-01')
|
120
|
+
#Dolar index monthly
|
121
|
+
#DTWEXM=fred.get_series('DTWEXM', observation_start='1979-01-01', observation_end='2024-03-01')
|
122
|
+
# inflation change quarter to quarter
|
123
|
+
#INFCHANGE=fred.get_series('BPCCRO1Q156NBEA', observation_start='2019-01-01', observation_end='2024-03-01')
|
124
|
+
```
|
125
|
+
|
126
|
+
### Processing the data
|
127
|
+
|
128
|
+
- In the next few cells I will take a look at each series we have downloaded
|
129
|
+
- Some have to check the Time Zone - I am going to go with making them all "time zone naive"
|
130
|
+
- Some data are monthly, some are quarterly - I am going to adjust everything to monthly by filling in quarterly data for the first of each month in that quarter
|
131
|
+
|
132
|
+
```python
|
133
|
+
#Inflation rate
|
134
|
+
print(CPIAUSCL.index.tzinfo)
|
135
|
+
CPIAUSCL
|
136
|
+
```
|
137
|
+
|
138
|
+
```python
|
139
|
+
#housing price change index is quarterly, we need to resample it to monthly
|
140
|
+
USSTHPI=USSTHPI.resample('MS').ffill()
|
141
|
+
print(USSTHPI.index.tzinfo)
|
142
|
+
|
143
|
+
USSTHPI
|
144
|
+
```
|
145
|
+
|
146
|
+
```python
|
147
|
+
# remove this series
|
148
|
+
#DTWEXM=DTWEXM.resample('MS').ffill()
|
149
|
+
#print(DTWEXM.index.tzinfo)
|
150
|
+
|
151
|
+
#DTWEXM
|
152
|
+
```
|
153
|
+
|
154
|
+
```python
|
155
|
+
# Wage series is quarterly, we need to resample it to monthly
|
156
|
+
WAGES=WAGES.resample('MS').ffill()
|
157
|
+
print(WAGES.index.tzinfo)
|
158
|
+
|
159
|
+
WAGES
|
160
|
+
```
|
161
|
+
|
162
|
+
```python
|
163
|
+
#convert gdp to monthly by using the quarterly data
|
164
|
+
GDP=GDP.resample('MS').ffill()
|
165
|
+
print(GDP.index.tzinfo)
|
166
|
+
|
167
|
+
GDP
|
168
|
+
```
|
169
|
+
|
170
|
+
```python
|
171
|
+
# Make a new variable thats the percent change in CPI
|
172
|
+
cpichange = CPIAUSCL.pct_change()
|
173
|
+
print(cpichange.index.tzinfo)
|
174
|
+
|
175
|
+
cpichange
|
176
|
+
```
|
177
|
+
|
178
|
+
### For the next couple of series we can get a longer time series by going to YFinance
|
179
|
+
|
180
|
+
|
181
|
+
|
182
|
+
```python
|
183
|
+
#download S&P 500 data close price only
|
184
|
+
SP500 = yf.download('^GSPC', start='1979-01-01', end='2024-03-01')
|
185
|
+
#convert daily data to monthly data
|
186
|
+
SP500=SP500.resample('MS').mean()
|
187
|
+
#drop all columns except close price
|
188
|
+
SP500=SP500['Close']
|
189
|
+
#make time zone naive
|
190
|
+
SP500.index = SP500.index.tz_localize(None)
|
191
|
+
|
192
|
+
SP500
|
193
|
+
```
|
194
|
+
|
195
|
+
```python
|
196
|
+
# Get the Dollar Index data
|
197
|
+
DXY = yf.download('DX-Y.NYB', start='1979-01-01', end='2024-03-01')
|
198
|
+
DXY=DXY.resample('MS').mean()
|
199
|
+
DXY=DXY['Close']
|
200
|
+
DXY.index = DXY.index.tz_localize(None)
|
201
|
+
DXY
|
202
|
+
```
|
203
|
+
|
204
|
+
Now Lets Combine these series into a Dataframe called Vibes
|
205
|
+
- save it as a csv so we can skip all the data processing in the future
|
206
|
+
- Drop missing values for now (What gets dropped!?!) ( eg data before Nov 2023)
|
207
|
+
- check the data visually
|
208
|
+
|
209
|
+
```python
|
210
|
+
# create a dataframe using pd.concat
|
211
|
+
vibes = pd.concat([UMCSENT, UNRATE, CPIAUSCL, GDP, USSTHPI, FEDFUNDSS, SP500, DXY, cpichange,WAGES], axis=1)
|
212
|
+
vibes.columns = ['UMCSENT', 'UNEMPLOYMENT', 'CPI', 'GDP', 'HOUSINGPRICE', 'FEDFUNDS', 'SP500', 'DOLLAR', 'CPICHANGE','WAGES']
|
213
|
+
# drop any rows with missing values
|
214
|
+
vibes.dropna(inplace=True)
|
215
|
+
# save vibes to a csv file
|
216
|
+
vibes.to_csv('vibes.csv')
|
217
|
+
vibes
|
218
|
+
```
|
219
|
+
|
220
|
+
## Part 2 - Modeling Consumer Sentiment with Macroeconomic Data Series
|
221
|
+
|
222
|
+
In the next section we will be modeling consumer sentiment and using the macroeconomcis time series
|
223
|
+
- split up the data into before and after covid
|
224
|
+
- run a regression to predict consumer sentiment
|
225
|
+
- Compare predicted to actual outcomes
|
226
|
+
- Look at the residuals
|
227
|
+
- Show how to run the model in SKLearn instead of statsmodels
|
228
|
+
|
229
|
+
```python
|
230
|
+
#read in vibes.csv
|
231
|
+
vibes = pd.read_csv('vibes.csv', index_col=0)
|
232
|
+
# Y = the variable we want to predict, the Target in ML
|
233
|
+
# X = the explanatory variabbles we use to predicy Y , or features in ML
|
234
|
+
X = vibes[['UNEMPLOYMENT', 'CPI', 'GDP', 'HOUSINGPRICE', 'FEDFUNDS', 'SP500', 'DOLLAR', 'CPICHANGE','WAGES']]
|
235
|
+
Y = vibes['UMCSENT'] # Make sure this is the correct column name for Consumer Sentiment
|
236
|
+
```
|
237
|
+
|
238
|
+
The first model will use Statsmodels to run a simple linear regression
|
239
|
+
|
240
|
+
*I know there are problems with this model specification!*
|
241
|
+
|
242
|
+
```python
|
243
|
+
X = sm.add_constant(X)
|
244
|
+
# Fit the model
|
245
|
+
model = sm.OLS(Y, X).fit()
|
246
|
+
# Print out the statistics
|
247
|
+
print(model.summary())
|
248
|
+
```
|
249
|
+
|
250
|
+
### Redo the Model as before and after COVID
|
251
|
+
|
252
|
+
The idea here is to train the model toon data up until 2019 and then use it to predict 2020-2024
|
253
|
+
|
254
|
+
```python
|
255
|
+
# Split the data into training and testing sets before and after Dec 2019
|
256
|
+
vibes_train = vibes.loc[:'2019-12-01']
|
257
|
+
vibes_test = vibes.loc['2020-01-01':]
|
258
|
+
|
259
|
+
X_train = vibes_train[['UNEMPLOYMENT', 'CPI', 'GDP', 'HOUSINGPRICE', 'FEDFUNDS', 'SP500', 'DOLLAR', 'CPICHANGE','WAGES']]
|
260
|
+
Y_train = vibes_train['UMCSENT']
|
261
|
+
|
262
|
+
X_test = vibes_test[['UNEMPLOYMENT', 'CPI', 'GDP', 'HOUSINGPRICE', 'FEDFUNDS', 'SP500', 'DOLLAR', 'CPICHANGE','WAGES']]
|
263
|
+
Y_test = vibes_test['UMCSENT']
|
264
|
+
|
265
|
+
X_train = sm.add_constant(X_train)
|
266
|
+
X_test = sm.add_constant(X_test)
|
267
|
+
```
|
268
|
+
|
269
|
+
```python
|
270
|
+
# Fit the model on the training data
|
271
|
+
model = sm.OLS(Y_train, X_train).fit()
|
272
|
+
# Summary of the model
|
273
|
+
print(model.summary())
|
274
|
+
# Predict the test data
|
275
|
+
Y_pred = model.predict(X_test)
|
276
|
+
# Calculate the MAE
|
277
|
+
mae = mean_absolute_error(Y_test, Y_pred)
|
278
|
+
print(f'The Mean Absolute Error of the model is {mae}')
|
279
|
+
```
|
280
|
+
|
281
|
+
```python
|
282
|
+
#plot the actual vs predicted values over the testing data
|
283
|
+
plt.figure(figsize=(12, 6))
|
284
|
+
plt.plot(Y_train, label='Train')
|
285
|
+
plt.plot(Y_test, label='Test')
|
286
|
+
plt.plot(Y_pred, label='Predicted')
|
287
|
+
plt.ylabel('Consumer Sentiment Index')
|
288
|
+
plt.legend()
|
289
|
+
plt.show()
|
290
|
+
```
|
291
|
+
|
292
|
+
```python
|
293
|
+
# Plot the data and the model's prediction for the entire time period
|
294
|
+
#X = vibes[['UNEMPLOYMENT', 'CPI', 'GDP', 'HOUSINGPRICE', 'FEDFUNDS', 'SP500', 'DOLLAR', 'CPICHANGE','WAGES']]
|
295
|
+
#X = sm.add_constant(X)
|
296
|
+
#Y = vibes['UMCSENT']
|
297
|
+
Y_pred_all = model.predict(X)
|
298
|
+
plt.figure(figsize=(12, 6))
|
299
|
+
plt.plot(Y, label='Actual')
|
300
|
+
plt.plot(Y_pred_all, label='Predicted')
|
301
|
+
# add a line at March 2020
|
302
|
+
plt.axvline('2020-03-01', color='red', linestyle='--')
|
303
|
+
#Label the Y axis
|
304
|
+
plt.ylabel('Consumer Sentiment Index')
|
305
|
+
# label the x axis by every 5 years
|
306
|
+
plt.xticks(['1980-01-01', '1985-01-01', '1990-01-01', '1995-01-01', '2000-01-01', '2005-01-01', '2010-01-01', '2015-01-01', '2020-01-01'])
|
307
|
+
|
308
|
+
plt.legend()
|
309
|
+
plt.show()
|
310
|
+
```
|
311
|
+
|
312
|
+
```python
|
313
|
+
# Plot the data and the model's prediction for the entire time period using plotly
|
314
|
+
# Plot actual and predicted values for Y
|
315
|
+
fig = px.line(vibes, y=['UMCSENT'], title='Consumer Sentiment Index')
|
316
|
+
fig.add_scatter(x=vibes.index, y=Y_pred_all, mode='lines', name='Predicted')
|
317
|
+
fig.update_yaxes(range=[0, 120])
|
318
|
+
# add a line at March 2020
|
319
|
+
fig.add_vline(x='2020-03-01', line_dash='dash', line_color='red')
|
320
|
+
# add y axis label
|
321
|
+
fig.update_yaxes(title_text='Consumer Sentiment Index')
|
322
|
+
fig.show()
|
323
|
+
```
|
324
|
+
|
325
|
+
```python
|
326
|
+
#plot the actual vs predicted values
|
327
|
+
plt.plot(Y_test.index, Y_test, label='Actual')
|
328
|
+
plt.plot(Y_test.index, Y_pred, label='Predicted')
|
329
|
+
plt.legend()
|
330
|
+
plt.show()
|
331
|
+
```
|
332
|
+
|
333
|
+
```python
|
334
|
+
#plot the residuals
|
335
|
+
residuals = Y_test - Y_pred
|
336
|
+
plt.plot(Y_test.index, residuals)
|
337
|
+
plt.axhline(0, color='red', linestyle='--')
|
338
|
+
plt.show()
|
339
|
+
```
|
340
|
+
|
341
|
+
```python
|
342
|
+
#plot the residuals over the entire time period
|
343
|
+
|
344
|
+
residuals_all = Y - Y_pred_all
|
345
|
+
plt.plot(Y.index, residuals_all)
|
346
|
+
plt.axhline(0, color='red', linestyle='--')
|
347
|
+
# label the x axis by every 5 years
|
348
|
+
plt.xticks(['1980-01-01', '1985-01-01', '1990-01-01', '1995-01-01', '2000-01-01', '2005-01-01', '2010-01-01', '2015-01-01', '2020-01-01'])
|
349
|
+
|
350
|
+
plt.show()
|
351
|
+
```
|
352
|
+
|
353
|
+
```python
|
354
|
+
# Lets make a model using sklearn
|
355
|
+
from sklearn.linear_model import LinearRegression
|
356
|
+
from sklearn.model_selection import train_test_split
|
357
|
+
from sklearn.metrics import mean_absolute_error
|
358
|
+
```
|
359
|
+
|
360
|
+
```python
|
361
|
+
# SKlearn Linear Regression Model
|
362
|
+
# Create a linear regression model
|
363
|
+
lr = LinearRegression()
|
364
|
+
|
365
|
+
# Fit the model
|
366
|
+
lr.fit(X_train, Y_train)
|
367
|
+
|
368
|
+
# Predict on the test set
|
369
|
+
Y_pred = lr.predict(X_test)
|
370
|
+
|
371
|
+
# Calculate the MAE
|
372
|
+
mae = mean_absolute_error(Y_test, Y_pred)
|
373
|
+
print(f'The Mean Absolute Error of the model is {mae}')
|
374
|
+
|
375
|
+
# print the R^2 value
|
376
|
+
print(f'The R^2 value of the training model is {lr.score(X_train, Y_train)}')
|
377
|
+
print(f'The R^2 value of the model on test data is {lr.score(X_test, Y_test)}')
|
378
|
+
```
|
379
|
+
|
380
|
+
```python
|
381
|
+
#plot the actual vs predicted values
|
382
|
+
plt.plot(Y_test.index, Y_test, label='Actual')
|
383
|
+
plt.plot(Y_test.index, Y_pred, label='Predicted')
|
384
|
+
plt.legend()
|
385
|
+
plt.show()
|
386
|
+
```
|
387
|
+
|
388
|
+
```python
|
389
|
+
# Fit the model
|
390
|
+
model = LinearRegression()
|
391
|
+
model.fit(X_train, Y_train)
|
392
|
+
# Make predictions
|
393
|
+
y_pred = model.predict(X_test)
|
394
|
+
# Evaluate the model
|
395
|
+
mae = mean_absolute_error(Y_test, y_pred)
|
396
|
+
# Print the MAE
|
397
|
+
print(f'The Mean Absolute Error of the model is {mae}')
|
398
|
+
# print the R^2
|
399
|
+
print(f'The R^2 of the model on test data is {model.score(X_test, Y_test)}')
|
400
|
+
```
|
401
|
+
|
402
|
+
```python
|
403
|
+
#Plot y over time with years on the x-axis
|
404
|
+
plt.figure(figsize=(12,6))
|
405
|
+
plt.plot(Y.index, Y, label='Actual')
|
406
|
+
plt.plot(Y_test.index, y_pred, label='Predicted')
|
407
|
+
plt.ylabel('Consumer Sentiment Index')
|
408
|
+
plt.legend()
|
409
|
+
plt.show()
|
410
|
+
```
|
411
|
+
|
412
|
+
## SKlearn - Let's try a different ML model
|
413
|
+
Now that we have the ML model all set up, we can try a different ML model. One that has been suggested for time series for improved fits is the Random Forest Regressor
|
414
|
+
|
415
|
+
```python
|
416
|
+
from sklearn.ensemble import RandomForestRegressor
|
417
|
+
|
418
|
+
# Create the model
|
419
|
+
model = RandomForestRegressor(n_estimators=100, random_state=42)
|
420
|
+
|
421
|
+
# Fit the model
|
422
|
+
model.fit(X_train, Y_train)
|
423
|
+
|
424
|
+
# Predict on the test set
|
425
|
+
RFRpred = model.predict(X_test)
|
426
|
+
|
427
|
+
#Evaluate the model on traning data
|
428
|
+
r2 = model.score(X_train, Y_train)
|
429
|
+
print("R-squared on training data :", r2)
|
430
|
+
|
431
|
+
# Calculate the MAE
|
432
|
+
mae = mean_absolute_error(Y_test, RFRpred)
|
433
|
+
print(f'The Mean Absolute Error of the model is {mae}')
|
434
|
+
|
435
|
+
# Evaluate the model (using R^2 score here)
|
436
|
+
r2 = model.score(X_test, Y_test)
|
437
|
+
print("R-squared on test data :", r2)
|
438
|
+
```
|
439
|
+
|
440
|
+
Let's look a the plots as we did for the regressions above
|
441
|
+
|
442
|
+
```python
|
443
|
+
#Plot y over time with years on the x-axis
|
444
|
+
plt.figure(figsize=(12,6))
|
445
|
+
plt.plot(Y.index, Y, label='Actual')
|
446
|
+
plt.plot(Y_test.index, RFRpred, label='Predicted')
|
447
|
+
plt.ylabel('Consumer Sentiment Index')
|
448
|
+
plt.legend()
|
449
|
+
plt.show()
|
450
|
+
```
|
451
|
+
|
452
|
+
```python
|
453
|
+
#Plot the Y_test and the predicted values
|
454
|
+
plt.plot(Y_test.index, Y_test, label='Actual')
|
455
|
+
plt.plot(Y_test.index, RFRpred, label='Predicted')
|
456
|
+
plt.legend()
|
457
|
+
plt.show()
|
458
|
+
```
|
459
|
+
|
460
|
+
```python
|
461
|
+
# plot the Y and the predicted values over the entire time period
|
462
|
+
RFRpred_all = model.predict(X)
|
463
|
+
plt.figure(figsize=(12, 6))
|
464
|
+
plt.plot(Y, label='Actual')
|
465
|
+
plt.plot(RFRpred_all, label='Predicted')
|
466
|
+
plt.ylabel('Consumer Sentiment Index')
|
467
|
+
plt.legend()
|
468
|
+
plt.show()
|
469
|
+
```
|
470
|
+
|
471
|
+
```python
|
472
|
+
# Plot the feature importances
|
473
|
+
importances = model.feature_importances_
|
474
|
+
plt.bar(X.columns, importances)
|
475
|
+
plt.ylabel('Importance')
|
476
|
+
plt.xticks(rotation=45)
|
477
|
+
plt.show()
|
478
|
+
```
|
479
|
+
|
480
|
+
|
481
|
+
|
482
|
+
```python
|
483
|
+
|
484
|
+
```
|
485
|
+
|