getfactormodels 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of getfactormodels might be problematic. Click here for more details.
- getfactormodels/__init__.py +43 -0
- getfactormodels/__main__.py +168 -0
- getfactormodels/models/__init__.py +24 -0
- getfactormodels/models/ff_models.py +141 -0
- getfactormodels/models/models.py +462 -0
- getfactormodels/utils/__init__.py +0 -0
- getfactormodels/utils/cli.py +28 -0
- getfactormodels/utils/utils.py +174 -0
- getfactormodels-0.0.1.dist-info/LICENSE +21 -0
- getfactormodels-0.0.1.dist-info/METADATA +234 -0
- getfactormodels-0.0.1.dist-info/RECORD +13 -0
- getfactormodels-0.0.1.dist-info/WHEEL +4 -0
- getfactormodels-0.0.1.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# MIT License
|
|
3
|
+
#
|
|
4
|
+
# Copyright (c) 2023 S. Martin
|
|
5
|
+
#
|
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
# furnished to do so, subject to the following conditions:
|
|
12
|
+
#
|
|
13
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
14
|
+
# copies or substantial portions of the Software.
|
|
15
|
+
#
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
# SOFTWARE.
|
|
23
|
+
__version__ = "0.0.1"
|
|
24
|
+
|
|
25
|
+
from .__main__ import FactorExtractor, get_factors
|
|
26
|
+
from .models import models # noqa: F401
|
|
27
|
+
from .models.models import (barillas_shanken_factors, carhart_factors,
|
|
28
|
+
dhs_factors, ff_factors, hml_devil_factors,
|
|
29
|
+
icr_factors, liquidity_factors, mispricing_factors,
|
|
30
|
+
q_classic_factors, q_factors)
|
|
31
|
+
|
|
32
|
+
__all__ = ["FactorExtractor",
|
|
33
|
+
"ff_factors",
|
|
34
|
+
"icr_factors",
|
|
35
|
+
"q_factors",
|
|
36
|
+
"q_classic_factors",
|
|
37
|
+
"mispricing_factors",
|
|
38
|
+
"dhs_factors",
|
|
39
|
+
"liquidity_factors",
|
|
40
|
+
"hml_devil_factors",
|
|
41
|
+
"barillas_shanken_factors",
|
|
42
|
+
"carhart_factors",
|
|
43
|
+
"get_factors", ]
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# ruff: noqa: F401
|
|
3
|
+
# TODO: rename __main__.py
|
|
4
|
+
import os
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from dateutil import parser
|
|
7
|
+
from getfactormodels.models.models import barillas_shanken_factors # noqa
|
|
8
|
+
from getfactormodels.models.models import (carhart_factors, dhs_factors,
|
|
9
|
+
ff_factors, hml_devil_factors,
|
|
10
|
+
icr_factors, liquidity_factors,
|
|
11
|
+
mispricing_factors,
|
|
12
|
+
q_classic_factors, q_factors)
|
|
13
|
+
from getfactormodels.utils.cli import parse_args
|
|
14
|
+
from getfactormodels.utils.utils import _get_model_key, _process
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def get_factors(model: str = "3", # noqa: C901
|
|
18
|
+
frequency: str = "M",
|
|
19
|
+
start_date=None,
|
|
20
|
+
end_date=None,
|
|
21
|
+
output=None) -> pd.DataFrame:
|
|
22
|
+
"""Get data for a specified factor model.
|
|
23
|
+
|
|
24
|
+
Return a DataFrame containing the data for the specified model and
|
|
25
|
+
frequency. If an output is specified, factor data is saved to a file.
|
|
26
|
+
|
|
27
|
+
Notes:
|
|
28
|
+
- Any string matching a model's regex (e.g., `liq` for `liquidity`) can be
|
|
29
|
+
used as a model name.
|
|
30
|
+
- Dates should be in ``YYYY-MM-DD`` format, but anything that
|
|
31
|
+
``dateutil.parser.parse()`` can interpret will work.
|
|
32
|
+
- Weekly data is only available for the q-factor and Fama-French 3-factor
|
|
33
|
+
models.
|
|
34
|
+
|
|
35
|
+
Parameters:
|
|
36
|
+
model (str): the factor model to return. One of: `liquidity`,
|
|
37
|
+
`icr`, `dhs`, `q`, `q_classic`, `ff3`, `ff5`, `ff6`, `carhart4`,
|
|
38
|
+
`hml_devil`, `barrilas_shanken`, or `mispricing`.
|
|
39
|
+
frequency (str): the frequency of the data. D, W, M or A (default: M).
|
|
40
|
+
start_date (str, optional): the start date of the data, YYYY-MM-DD.
|
|
41
|
+
end_date (str, optional): the end date of the data, YYYY-MM-DD.
|
|
42
|
+
output (str, optional): a filename, directory, or filepath. Accepts
|
|
43
|
+
'.txt', '.csv', '.md', '.xlsx', '.pkl' as file extensions.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
pandas.DataFrame: factor data, indexed by date.
|
|
47
|
+
"""
|
|
48
|
+
frequency = frequency.lower()
|
|
49
|
+
model = _get_model_key(model)
|
|
50
|
+
|
|
51
|
+
# Get the function by its name, if it exists call it with params
|
|
52
|
+
if model in ["3", "4", "5", "6"]:
|
|
53
|
+
return ff_factors(model, frequency, start_date, end_date)
|
|
54
|
+
else:
|
|
55
|
+
function_name = f"{model}_factors"
|
|
56
|
+
function = globals().get(function_name)
|
|
57
|
+
|
|
58
|
+
if not function:
|
|
59
|
+
raise ValueError(f"Invalid model: {model}")
|
|
60
|
+
|
|
61
|
+
df = function(frequency, start_date, end_date, output)
|
|
62
|
+
return df
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class FactorExtractor:
|
|
66
|
+
"""
|
|
67
|
+
Extracts factor data based on specified parameters.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
model : str
|
|
71
|
+
The factor model to use. Defaults to '3'.
|
|
72
|
+
frequency (str, optional): The frequency of the data. Defaults to 'M'.
|
|
73
|
+
start_date (str, optional): The start date of the data.
|
|
74
|
+
end_date (str, optional): The end date of the data.
|
|
75
|
+
|
|
76
|
+
Methods:
|
|
77
|
+
drop_rf: Drops the 'RF' column from the DataFrame.
|
|
78
|
+
save_factors: Saves the factor data to a file.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
def __init__(self,
|
|
82
|
+
model='3',
|
|
83
|
+
frequency='M',
|
|
84
|
+
start_date=None,
|
|
85
|
+
end_date=None,
|
|
86
|
+
output=None):
|
|
87
|
+
self.model: str = model
|
|
88
|
+
self.frequency: str = frequency
|
|
89
|
+
self.start_date = self.validate_date_format(start_date) if start_date \
|
|
90
|
+
else None
|
|
91
|
+
self.end_date = self.validate_date_format(end_date) if end_date \
|
|
92
|
+
else None
|
|
93
|
+
self.output = output
|
|
94
|
+
self._no_rf = False
|
|
95
|
+
self.df = None
|
|
96
|
+
|
|
97
|
+
def no_rf(self):
|
|
98
|
+
"""Sets the _no_rf flag to True."""
|
|
99
|
+
self._no_rf = True
|
|
100
|
+
|
|
101
|
+
@staticmethod
|
|
102
|
+
def validate_date_format(date_string):
|
|
103
|
+
"""
|
|
104
|
+
Validate the date format.
|
|
105
|
+
|
|
106
|
+
Raises:
|
|
107
|
+
ValueError: If the date format is incorrect.
|
|
108
|
+
"""
|
|
109
|
+
try:
|
|
110
|
+
return parser.parse(date_string).strftime("%Y-%m-%d")
|
|
111
|
+
except ValueError as err:
|
|
112
|
+
raise ValueError("Incorrect date format, use YYYY-MM-DD.") from err
|
|
113
|
+
|
|
114
|
+
def get_factors(self) -> pd.DataFrame:
|
|
115
|
+
"""Fetch the factor data and store it in the class."""
|
|
116
|
+
self.df = get_factors(
|
|
117
|
+
model=self.model,
|
|
118
|
+
frequency=self.frequency,
|
|
119
|
+
start_date=self.start_date,
|
|
120
|
+
end_date=self.end_date)
|
|
121
|
+
|
|
122
|
+
if self._no_rf:
|
|
123
|
+
self.df = self.drop_rf(self.df)
|
|
124
|
+
|
|
125
|
+
return self.df
|
|
126
|
+
|
|
127
|
+
def drop_rf(self, df):
|
|
128
|
+
"""Drop the ``RF`` column from the DataFrame."""
|
|
129
|
+
if "RF" in df.columns:
|
|
130
|
+
df = df.drop(columns=["RF"])
|
|
131
|
+
else:
|
|
132
|
+
print("`drop_rf` was called but no RF column was found.")
|
|
133
|
+
|
|
134
|
+
return df
|
|
135
|
+
|
|
136
|
+
def to_file(self, filename):
|
|
137
|
+
"""
|
|
138
|
+
Save the factor data to a file.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
filename (str): The name of the file to save the data to.
|
|
142
|
+
"""
|
|
143
|
+
if self.df is None:
|
|
144
|
+
raise ValueError("No data to save. Fetch factors first.")
|
|
145
|
+
|
|
146
|
+
# TODO: could call _save_to_file directly
|
|
147
|
+
_process(self.df, filepath=filename)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def main():
|
|
151
|
+
args = parse_args()
|
|
152
|
+
|
|
153
|
+
extractor = FactorExtractor(model=args.model, frequency=args.freq,
|
|
154
|
+
start_date=args.start, end_date=args.end)
|
|
155
|
+
if args.no_rf:
|
|
156
|
+
extractor.no_rf()
|
|
157
|
+
|
|
158
|
+
df = extractor.get_factors()
|
|
159
|
+
|
|
160
|
+
if args.output:
|
|
161
|
+
extractor.to_file(args.output)
|
|
162
|
+
print(f'File saved to "{os.path.abspath(args.output)}"')
|
|
163
|
+
else:
|
|
164
|
+
print(df)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
if __name__ == '__main__':
|
|
168
|
+
main()
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# MIT License
|
|
3
|
+
#
|
|
4
|
+
# Copyright (c) 2023 S. Martin
|
|
5
|
+
#
|
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
# furnished to do so, subject to the following conditions:
|
|
12
|
+
#
|
|
13
|
+
# The above copyright notice and this permission notice shall be included in
|
|
14
|
+
# all copies or substantial portions of the Software.
|
|
15
|
+
#
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
# SOFTWARE.
|
|
23
|
+
from . import ff_models # noqa: F401 - TODO: disable 401 in all __init__
|
|
24
|
+
from . import models # noqa: F401
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pandas as pd # noqa: D100
|
|
4
|
+
from ..utils.utils import ( # noqa - todo: fix relative import from parent modules banned
|
|
5
|
+
_process, get_zip_from_url)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _ff_construct_url(model="3", frequency="M"):
|
|
9
|
+
"""Construct and return the URL for the specified model and frequency."""
|
|
10
|
+
frequency = frequency.upper()
|
|
11
|
+
|
|
12
|
+
if frequency == "W" and model not in ["3", "4"]: # why 4?
|
|
13
|
+
raise ValueError("Weekly data is only available for the Fama \
|
|
14
|
+
French 3 factor model at the moment.")
|
|
15
|
+
|
|
16
|
+
base_url = "https://mba.tuck.dartmouth.edu"
|
|
17
|
+
ftp = "pages/faculty/ken.french/ftp"
|
|
18
|
+
|
|
19
|
+
file = f'F-F_{"Research_Data_" if model in ["3", "4", "5", "6"] else ""}'
|
|
20
|
+
file += ("Factors" if model in ["3", "4"]
|
|
21
|
+
else "5_Factors_2x3" if model in ["5", "6"]
|
|
22
|
+
else "")
|
|
23
|
+
file += "_daily" if frequency == "D" \
|
|
24
|
+
else "_weekly" if frequency == "W" else ""
|
|
25
|
+
file += "_CSV.zip"
|
|
26
|
+
|
|
27
|
+
return f"{base_url}/{ftp}/{file}"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def ff_read_csv_from_zip(zip_file, model=None):
|
|
31
|
+
"""Read the FF Factors CSV into a dataframe."""
|
|
32
|
+
try:
|
|
33
|
+
filename = zip_file.namelist()[0]
|
|
34
|
+
with zip_file.open(filename) as file:
|
|
35
|
+
data = pd.read_csv(
|
|
36
|
+
file,
|
|
37
|
+
skiprows=12 if 'momentum' in filename.lower() else 3 if 'ly' in filename.lower() else 2, # noqa: E501
|
|
38
|
+
index_col=0,
|
|
39
|
+
header=0,
|
|
40
|
+
parse_dates=False,
|
|
41
|
+
skipfooter=1,
|
|
42
|
+
engine="python")
|
|
43
|
+
|
|
44
|
+
data.index = data.index.astype(str)
|
|
45
|
+
data.index = data.index.str.strip()
|
|
46
|
+
data.index.name = "date"
|
|
47
|
+
data = data.dropna()
|
|
48
|
+
except Exception as e:
|
|
49
|
+
print(f"Error reading file: {e}")
|
|
50
|
+
return None
|
|
51
|
+
return data
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def ff_process_data(data, model, frequency) -> pd.DataFrame:
|
|
55
|
+
"""Process and return the data based on the provided model and frequency.
|
|
56
|
+
"""
|
|
57
|
+
frequency = frequency.lower()
|
|
58
|
+
|
|
59
|
+
if frequency == 'm':
|
|
60
|
+
data = data[data.index.str.len() == 6]
|
|
61
|
+
elif frequency == 'y':
|
|
62
|
+
data = data[data.index.str.len() == 4]
|
|
63
|
+
else:
|
|
64
|
+
data = data[data.index.str.len() == 8]
|
|
65
|
+
|
|
66
|
+
try:
|
|
67
|
+
if frequency == 'm':
|
|
68
|
+
data.index = pd.to_datetime(data.index, format='%Y%m') \
|
|
69
|
+
+ pd.offsets.MonthEnd(0)
|
|
70
|
+
else:
|
|
71
|
+
data.index = pd.to_datetime(data.index, format='%Y%m%d')
|
|
72
|
+
|
|
73
|
+
except Exception:
|
|
74
|
+
data.index = pd.to_datetime(data.index, format='%Y') \
|
|
75
|
+
+ pd.offsets.YearEnd(0, month=12)
|
|
76
|
+
|
|
77
|
+
data.index.name = "date"
|
|
78
|
+
|
|
79
|
+
# All values (eg, 4/D, are <5% distinct).
|
|
80
|
+
# If <10% distinct, categorize
|
|
81
|
+
# if len(data) / data.nunique() < 10:
|
|
82
|
+
# data = data.astype('category')
|
|
83
|
+
|
|
84
|
+
return data
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _ff_get_mom(frequency) -> pd.Series:
|
|
88
|
+
"""Fetch and return the momentum factor data as a pd.Series.
|
|
89
|
+
* Note: only for returning the raw data for the 4 and 6 factor models.
|
|
90
|
+
"""
|
|
91
|
+
frequency = frequency.upper()
|
|
92
|
+
base_url = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp"
|
|
93
|
+
file = "F-F_Momentum_Factor_daily_CSV.zip" if frequency == "D" \
|
|
94
|
+
else "F-F_Momentum_Factor_CSV.zip"
|
|
95
|
+
url = f"{base_url}/{file}"
|
|
96
|
+
|
|
97
|
+
csv = ff_read_csv_from_zip(get_zip_from_url(url))
|
|
98
|
+
|
|
99
|
+
csv.columns = ["MOM"]
|
|
100
|
+
csv.index.name = "date"
|
|
101
|
+
|
|
102
|
+
csv.index = csv.index.astype(str)
|
|
103
|
+
csv.index = csv.index.str.strip()
|
|
104
|
+
|
|
105
|
+
return csv
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _get_ff_factors(model: str = "3",
|
|
109
|
+
frequency: str = "M",
|
|
110
|
+
start_date=None,
|
|
111
|
+
end_date=None) -> pd.DataFrame:
|
|
112
|
+
"""Return the Fama French 3, 5, or 6, or Carhart 4 factor model data.
|
|
113
|
+
|
|
114
|
+
* Note: This is the function that's called by get_ff_factors in main.
|
|
115
|
+
"""
|
|
116
|
+
if frequency is None:
|
|
117
|
+
frequency = "M"
|
|
118
|
+
url = _ff_construct_url(model, frequency)
|
|
119
|
+
zip = get_zip_from_url(url)
|
|
120
|
+
csv = ff_read_csv_from_zip(zip, model)
|
|
121
|
+
|
|
122
|
+
if model in ["4", "6"]:
|
|
123
|
+
mom = _ff_get_mom(frequency)
|
|
124
|
+
if model == "6":
|
|
125
|
+
mom = mom.rename(columns={"MOM": "UMD"})
|
|
126
|
+
mom = pd.DataFrame(mom)
|
|
127
|
+
csv = csv.join(mom, how="left")
|
|
128
|
+
|
|
129
|
+
data = ff_process_data(csv, model, frequency)
|
|
130
|
+
data = data.apply(pd.to_numeric, errors='ignore')
|
|
131
|
+
|
|
132
|
+
if start_date is not None or end_date is not None:
|
|
133
|
+
data = data.loc[start_date:end_date]
|
|
134
|
+
|
|
135
|
+
data = data.dropna()
|
|
136
|
+
|
|
137
|
+
data = np.multiply(data, 0.01)
|
|
138
|
+
return _process(data, start_date, end_date)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# TODO: just redo all of this.
|
|
@@ -0,0 +1,462 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
models
|
|
4
|
+
=======
|
|
5
|
+
Functions for retrieving and processing multi-factor model data.
|
|
6
|
+
|
|
7
|
+
Functions for fetching data for a variety of factor models. The data can be
|
|
8
|
+
returned for different frequencies and for a specified date range. The function
|
|
9
|
+
supports a variety of model names that match specific regex patterns, including
|
|
10
|
+
'liquidity', 'icr', 'dhs', 'q', 'q_classic', 'ff3', 'ff5', 'ff6', 'carhart4',
|
|
11
|
+
'hml_devil', 'barrilas_shanken', and 'mispricing'.
|
|
12
|
+
|
|
13
|
+
Functions
|
|
14
|
+
---------
|
|
15
|
+
- `ff_factors`: Retrieves data for a specified Fama-French or Carhart factor
|
|
16
|
+
model.
|
|
17
|
+
- `q_factors`: Retrieves the q-factor model data from global-q.org.
|
|
18
|
+
- `q_classic_factors`: Retrieves the original 4-factor "q" model of Hou, Xue,
|
|
19
|
+
and Zhang (2015).
|
|
20
|
+
- `dhs_factor`: Retrieves the Daniel-Hirshleifer-Sun Behavioural factors.
|
|
21
|
+
- `icr_factors`: Retrieves the He, Kelly, Manela (2017) ICR factors.
|
|
22
|
+
- `hml_devil_factors`: Retrieves the HML Devil factors from AQR.
|
|
23
|
+
- `barillas_shanken_factors`: Constructs the 6-factor model of Barillas and
|
|
24
|
+
Shanken.
|
|
25
|
+
- `carhart_factors`: Retrieves the Carhart 4-factor model data.
|
|
26
|
+
- `liquidity_factors`: Retrieves the Pastor-Stambaugh liquidity factors.
|
|
27
|
+
- `mispricing_factors`: Retrieves the Stambaugh-Yuan (201x) mispricing factors.
|
|
28
|
+
"""
|
|
29
|
+
import os
|
|
30
|
+
import pickle
|
|
31
|
+
from io import BytesIO
|
|
32
|
+
from typing import Optional, Union
|
|
33
|
+
import numpy as np
|
|
34
|
+
import pandas as pd
|
|
35
|
+
import requests
|
|
36
|
+
from getfactormodels.utils.utils import _process, get_file_from_url
|
|
37
|
+
from .ff_models import _get_ff_factors
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def ff_factors(model: str = "3", # TODO: fix: _get_ff_factors filepath param
|
|
41
|
+
frequency: str = "M",
|
|
42
|
+
start_date: str = None,
|
|
43
|
+
end_date: str = None,
|
|
44
|
+
output: str = None) -> pd.DataFrame:
|
|
45
|
+
"""Get data for a specified Fama-French or Carhart factor model.
|
|
46
|
+
|
|
47
|
+
This function returns a DataFrame containing the 3-factor (1993), 5-factor
|
|
48
|
+
(2015), or 6-factor (2018) model of Fama & French, or Carhart's (1997)
|
|
49
|
+
4-factor model. Data is available in daily, weekly, monthly, and annual
|
|
50
|
+
frequencies. If an output is specified, saves the data to a file.
|
|
51
|
+
|
|
52
|
+
Notes:
|
|
53
|
+
- Only the 3-factor model offers weekly data.
|
|
54
|
+
- Dates should be in ``YYYY-MM-DD`` format, but anything that
|
|
55
|
+
``dateutil.parser.parse()`` can interpret will work.
|
|
56
|
+
|
|
57
|
+
Parameters:
|
|
58
|
+
model (str, int): the Fama-French or Carhart factor data to return. 3, 4, 5
|
|
59
|
+
or 6 (default: 3).
|
|
60
|
+
frequency (str): the frequency of the data. Accepts D, W, M or Y
|
|
61
|
+
(default: M).
|
|
62
|
+
start_date (str, optional): the start date of the data, as YYYY-MM-DD.
|
|
63
|
+
end_date (str, optional): the end date of the data, as YYYY-MM-DD.
|
|
64
|
+
output (str, optional): a filename, directory, or filepath. If no
|
|
65
|
+
extension is provided, will output a '.csv'. Accepts '.txt',
|
|
66
|
+
'.csv', '.md', '.xlsx', '.pkl'.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
pandas.DataFrame: factor data, indexed by date.
|
|
70
|
+
"""
|
|
71
|
+
model = str(model)
|
|
72
|
+
data = _get_ff_factors(model, frequency, start_date, end_date)
|
|
73
|
+
return _process(data, start_date, end_date, filepath=output)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def liquidity_factors(frequency: str = "M",
|
|
77
|
+
start_date: str = None,
|
|
78
|
+
end_date: str = None,
|
|
79
|
+
output: str = None) -> pd.DataFrame:
|
|
80
|
+
"""Retrieve the Pastor-Stambaugh liquidity factors. Monthly data only."""
|
|
81
|
+
url = 'https://research.chicagobooth.edu/'
|
|
82
|
+
url += '-/media/research/famamiller/data/liq_data_1962_2022.txt'
|
|
83
|
+
|
|
84
|
+
if frequency.lower() != 'm':
|
|
85
|
+
print('Liquidity factors are only available for monthly frequency.')
|
|
86
|
+
raise ValueError("Frequency must be 'm'.")
|
|
87
|
+
|
|
88
|
+
# Get .csv here...
|
|
89
|
+
data = get_file_from_url(url)
|
|
90
|
+
|
|
91
|
+
# Headers are last commented line
|
|
92
|
+
headers = [line[1:].strip().split('\t')
|
|
93
|
+
for line in data.readlines() if line.startswith('%')][-1]
|
|
94
|
+
|
|
95
|
+
# Fix: was losing first line of data
|
|
96
|
+
data.seek(0)
|
|
97
|
+
|
|
98
|
+
# ...read .csv here
|
|
99
|
+
data = pd.read_csv(data, sep='\\s+', names=headers,
|
|
100
|
+
comment='%', index_col=0)
|
|
101
|
+
|
|
102
|
+
data.index.name = 'date'
|
|
103
|
+
data.index = data.index.astype(str)
|
|
104
|
+
|
|
105
|
+
data = data.rename(columns={'Agg Liq.': 'AGG_LIQ',
|
|
106
|
+
'Innov Liq (eq8)': 'INNOV_LIQ',
|
|
107
|
+
'Traded Liq (LIQ_V)': 'TRADED_LIQ'})
|
|
108
|
+
|
|
109
|
+
# The first 65 values in the traded liquidity series are -99.000000.
|
|
110
|
+
data['TRADED_LIQ'] = data['TRADED_LIQ'].replace(-99.000000, 0)
|
|
111
|
+
|
|
112
|
+
if frequency.lower() == 'm':
|
|
113
|
+
data.index = pd.to_datetime(data.index, format='%Y%m') \
|
|
114
|
+
+ pd.offsets.MonthEnd(0)
|
|
115
|
+
|
|
116
|
+
return _process(data, start_date, end_date, filepath=output)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def mispricing_factors(frequency: str = "M",
|
|
120
|
+
start_date: str = None,
|
|
121
|
+
end_date: str = None,
|
|
122
|
+
output: str = None) -> pd.DataFrame:
|
|
123
|
+
"""Retrieve the Stambaugh-Yuan mispricing factors. Daily and monthly."""
|
|
124
|
+
if frequency.lower() not in ["d", "m"]:
|
|
125
|
+
print("Mispricing factors are only available for daily and monthly \
|
|
126
|
+
frequency.")
|
|
127
|
+
raise ValueError("Frequency must be 'd' or 'm'.")
|
|
128
|
+
return None
|
|
129
|
+
|
|
130
|
+
file = "M4d" if frequency == "d" else "M4"
|
|
131
|
+
url = f"https://finance.wharton.upenn.edu/~stambaug/{file}.csv"
|
|
132
|
+
|
|
133
|
+
data = get_file_from_url(url)
|
|
134
|
+
|
|
135
|
+
data = pd.read_csv(data, index_col=0, parse_dates=False,
|
|
136
|
+
date_format="%Y%m%d", engine="pyarrow") # only model
|
|
137
|
+
# using pyarrow? # noqa
|
|
138
|
+
|
|
139
|
+
data = data.rename(columns={"SMB": "SMB_SY",
|
|
140
|
+
"MKTRF": "Mkt-RF"}).rename_axis("date")
|
|
141
|
+
|
|
142
|
+
if frequency == "d":
|
|
143
|
+
data.index = pd.to_datetime(data.index, format="%Y%m%d")
|
|
144
|
+
elif frequency == "m":
|
|
145
|
+
data.index = pd.to_datetime(data.index, format="%Y%m")
|
|
146
|
+
data.index = data.index + pd.offsets.MonthEnd(0)
|
|
147
|
+
|
|
148
|
+
return _process(data, start_date, end_date, filepath=output)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def q_factors(frequency: str = "M",
|
|
152
|
+
start_date: str = None,
|
|
153
|
+
end_date: str = None,
|
|
154
|
+
output: str = None,
|
|
155
|
+
classic: bool = False) -> pd.DataFrame:
|
|
156
|
+
"""Retrieve the q-factor model data."""
|
|
157
|
+
frequency = frequency.upper()
|
|
158
|
+
file = {"M": "monthly",
|
|
159
|
+
"D": "daily",
|
|
160
|
+
"Q": "quarterly",
|
|
161
|
+
"W": "weekly",
|
|
162
|
+
"Y": "annual", }.get(frequency)
|
|
163
|
+
|
|
164
|
+
base_url = 'https://global-q.org/uploads'
|
|
165
|
+
url = f"{base_url}/1/2/2/6/122679606/q5_factors_{file}_2022.csv"
|
|
166
|
+
|
|
167
|
+
index_cols = [0, 1] if frequency in ["M", "Q"] else [0]
|
|
168
|
+
data = pd.read_csv(
|
|
169
|
+
url, parse_dates=False, index_col=index_cols, float_precision="high")
|
|
170
|
+
|
|
171
|
+
if classic:
|
|
172
|
+
data = data.drop(columns=["R_EG"])
|
|
173
|
+
|
|
174
|
+
data = data.rename(columns={"R_F": "RF"})
|
|
175
|
+
|
|
176
|
+
data = np.multiply(data, 0.01)
|
|
177
|
+
|
|
178
|
+
if frequency in ["M", "Q"]:
|
|
179
|
+
# Need to insert "-" (monthly) or "Q" (quarterly) into date str.
|
|
180
|
+
data = data.reset_index()
|
|
181
|
+
col = "quarter" if frequency == "Q" else "month"
|
|
182
|
+
char = "Q" if frequency == "Q" else "-"
|
|
183
|
+
|
|
184
|
+
data["date"] = pd.PeriodIndex(
|
|
185
|
+
data["year"].astype(str)
|
|
186
|
+
+ char
|
|
187
|
+
+ data[col].astype(str), freq=frequency
|
|
188
|
+
).to_timestamp(how="end")
|
|
189
|
+
|
|
190
|
+
data["date"] = data["date"].dt.normalize()
|
|
191
|
+
data = data.drop(["year", col], axis=1).set_index("date")
|
|
192
|
+
|
|
193
|
+
if frequency == "Y":
|
|
194
|
+
data.index = pd.to_datetime(data.index.astype(str)) \
|
|
195
|
+
+ pd.offsets.YearEnd(0)
|
|
196
|
+
else:
|
|
197
|
+
data.index = pd.to_datetime(data.index.astype(str))
|
|
198
|
+
|
|
199
|
+
data.columns = data.columns.str.upper()
|
|
200
|
+
data.index.name = "date"
|
|
201
|
+
data = data.rename(columns={"R_MKT": "Mkt-RF"})
|
|
202
|
+
|
|
203
|
+
return _process(data, start_date, end_date, filepath=output)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
# Daniel-Hirshleifer-Sun Behavioural Factors
|
|
207
|
+
def dhs_factors(frequency: str = "M",
|
|
208
|
+
start_date: str = None,
|
|
209
|
+
end_date: str = None,
|
|
210
|
+
output: str = None) -> pd.DataFrame:
|
|
211
|
+
"""Retrieve DHS factors from sheets on Lin Sun's website."""
|
|
212
|
+
frequency = frequency.lower()
|
|
213
|
+
base_url = "https://docs.google.com/spreadsheets/d/"
|
|
214
|
+
|
|
215
|
+
if frequency.lower() == "m":
|
|
216
|
+
file = "1RxYLbCfk19m8fnniiJYfaj3yI55ZPaoi/export?format=xlsx"
|
|
217
|
+
elif frequency.lower() == "d":
|
|
218
|
+
file = "1KnCP-NVhf2Sni8bVFIVyMxW-vIljBOWE/export?format=xlsx"
|
|
219
|
+
else:
|
|
220
|
+
print("Frequency must be either 'M' (monthly) or 'D' (daily).")
|
|
221
|
+
raise ValueError("Frequency must be 'M' or 'D'.")
|
|
222
|
+
# TODO: use the link to the Google Sheet instead of the actual sheet.
|
|
223
|
+
|
|
224
|
+
url = base_url + file
|
|
225
|
+
|
|
226
|
+
response = requests.get(url, verify=True, timeout=20)
|
|
227
|
+
file = BytesIO(response.content)
|
|
228
|
+
|
|
229
|
+
data = pd.read_excel(file, index_col="Date",
|
|
230
|
+
usecols=['Date', 'FIN', 'PEAD'], engine='openpyxl',
|
|
231
|
+
header=0, parse_dates=False)
|
|
232
|
+
data.index.name = "date"
|
|
233
|
+
|
|
234
|
+
if frequency.lower() == "d":
|
|
235
|
+
data.index = pd.to_datetime(data.index, format="%m/%d/%Y")
|
|
236
|
+
else:
|
|
237
|
+
data.index = pd.to_datetime(data.index, format="%Y%m")
|
|
238
|
+
data.index = data.index + pd.offsets.MonthEnd(0)
|
|
239
|
+
|
|
240
|
+
data = np.multiply(data, 0.01) # Decimalize before FF factors!
|
|
241
|
+
|
|
242
|
+
ff = _get_ff_factors(model="3", frequency=frequency,
|
|
243
|
+
start_date=data.index[0], end_date=data.index[-1])
|
|
244
|
+
ff = ff.round(4)
|
|
245
|
+
# Note: FF source data is to 4 decimals; re-rounding here to avoid
|
|
246
|
+
# rounding errors (e.g., 0.02 --> 0.019999999999999997)
|
|
247
|
+
data = pd.concat([ff["Mkt-RF"], data, ff["RF"]], axis=1)
|
|
248
|
+
data.index.name = "date"
|
|
249
|
+
|
|
250
|
+
return _process(data, start_date, end_date, filepath=output)
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def icr_factors(frequency: str = "M",
|
|
254
|
+
start_date: str = None,
|
|
255
|
+
end_date: str = None,
|
|
256
|
+
output: str = None) -> pd.DataFrame:
|
|
257
|
+
"""Retrieve the He, Kelly, Manela (2017) ICR factors.
|
|
258
|
+
* Daily since 1999-05-03; quarterly and monthly since 1970.
|
|
259
|
+
"""
|
|
260
|
+
# TODO: Do we need Mkt-RF and RF [seen reffered to as 2-factor model]?
|
|
261
|
+
frequency = frequency.lower()
|
|
262
|
+
|
|
263
|
+
if frequency not in ["d", "m", "q"]:
|
|
264
|
+
raise ValueError("Frequency must be 'd', 'm' or 'q'.")
|
|
265
|
+
|
|
266
|
+
base_url = "https://voices.uchicago.edu/zhiguohe"
|
|
267
|
+
file = {"d": "daily", "m": "monthly", "q": "quarterly"}.get(frequency)
|
|
268
|
+
url = f"{base_url}/files/2023/10/He_Kelly_Manela_Factors_{file}.csv"
|
|
269
|
+
|
|
270
|
+
df = get_file_from_url(url)
|
|
271
|
+
df = pd.read_csv(df)
|
|
272
|
+
df = df.rename(columns={df.columns[0]: "date"})
|
|
273
|
+
|
|
274
|
+
# Just doing dates here for now...
|
|
275
|
+
if frequency == "q":
|
|
276
|
+
# The dates are YYYYQ. [19752 -> 1975Q2]
|
|
277
|
+
df["date"] = df["date"].astype(str)
|
|
278
|
+
df["date"] = df["date"].str[:-1] + "Q" + df["date"].str[-1]
|
|
279
|
+
df["date"] = pd.PeriodIndex(df["date"], freq="Q").to_timestamp() \
|
|
280
|
+
+ pd.offsets.QuarterEnd(0)
|
|
281
|
+
|
|
282
|
+
df = df.rename(columns={
|
|
283
|
+
"intermediary_capital_ratio": "IC_RATIO",
|
|
284
|
+
"intermediary_capital_risk_factor": "IC_RISK_FACTOR",
|
|
285
|
+
"intermediary_leverage_ratio_squared": "INT_LEV_RATIO_SQ",
|
|
286
|
+
"intermediary_value_weighted_investment_return": "INT_VW_ROI", })
|
|
287
|
+
|
|
288
|
+
if frequency == "m":
|
|
289
|
+
df["date"] = pd.to_datetime(df["date"], format="%Y%m")
|
|
290
|
+
df["date"] = df["date"] + pd.offsets.MonthEnd(0)
|
|
291
|
+
|
|
292
|
+
elif frequency == "d":
|
|
293
|
+
df["date"] = pd.to_datetime(df["date"], format="%Y%m%d")
|
|
294
|
+
|
|
295
|
+
df = df.set_index("date")
|
|
296
|
+
|
|
297
|
+
return _process(df, start_date, end_date, filepath=output)
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def q_classic_factors(frequency: str = "M",
|
|
301
|
+
start_date: str = None,
|
|
302
|
+
end_date: str = None,
|
|
303
|
+
output: str = None) -> pd.DataFrame:
|
|
304
|
+
"""Retrieve the classic q-factor model of Hou, Xue, and Zhang (2015)."""
|
|
305
|
+
return q_factors(frequency, start_date, end_date, output=output,
|
|
306
|
+
classic=True)
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def carhart_factors(frequency: str = "M",
|
|
310
|
+
start_date: str = None,
|
|
311
|
+
end_date: str = None,
|
|
312
|
+
output: str = None) -> pd.DataFrame:
|
|
313
|
+
"""Retrieve the Carhart 4-factor model data."""
|
|
314
|
+
data = _get_ff_factors(model='4', frequency=frequency,
|
|
315
|
+
start_date=start_date,
|
|
316
|
+
end_date=end_date)
|
|
317
|
+
return _process(data, start_date, end_date, filepath=output)
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def _create_cache():
|
|
321
|
+
cache_dir = os.path.expanduser('~/.cache/getfactormodels')
|
|
322
|
+
if not os.path.exists(cache_dir):
|
|
323
|
+
os.makedirs(cache_dir)
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def hml_devil_factors(frequency='M',
|
|
327
|
+
start_date: Optional[str] = None,
|
|
328
|
+
end_date: Optional[str] = None,
|
|
329
|
+
output: Optional[str] = None,
|
|
330
|
+
series=False) -> Union[pd.Series, pd.DataFrame]:
|
|
331
|
+
"""***EXPERIMENTAL***
|
|
332
|
+
|
|
333
|
+
Retrieve the HML Devil factors from AQR.com. [FIXME: Slow.]
|
|
334
|
+
|
|
335
|
+
Notes:
|
|
336
|
+
- Slow. Very slow. So we implement a cache and it doesn't need to run
|
|
337
|
+
again until tomorrow (daily) or next month.
|
|
338
|
+
|
|
339
|
+
Parameters:
|
|
340
|
+
frequency (str): The frequency of the data. M, D (default: M)
|
|
341
|
+
start_date (str, optional): The start date of the data, YYYY-MM-DD.
|
|
342
|
+
end_date (str, optional): The end date of the data, YYYY-MM-DD.
|
|
343
|
+
output (str, optional): The filepath to save the output data.
|
|
344
|
+
series (bool, optional): If True, return the HML Devil factors as a
|
|
345
|
+
pandas Series.
|
|
346
|
+
|
|
347
|
+
Returns:
|
|
348
|
+
pd.DataFrame: the HML Devil model data indexed by date.
|
|
349
|
+
pd.Series: the HML factor as a pd.Series
|
|
350
|
+
"""
|
|
351
|
+
_create_cache() # TODO: allow config and cache_file [not pickle]
|
|
352
|
+
pickle_file = os.path.expanduser(f'~/.cache/getfactormodels/hml_devil_{frequency}.pkl') # noqa
|
|
353
|
+
|
|
354
|
+
# Get the current date and the date of the file creation
|
|
355
|
+
current_date = pd.to_datetime('today')
|
|
356
|
+
if os.path.exists(pickle_file):
|
|
357
|
+
file_date = pd.to_datetime(os.path.getmtime(pickle_file), unit='s')
|
|
358
|
+
|
|
359
|
+
# If the pickle file exists and is not expired, load the df from it
|
|
360
|
+
if (frequency.lower() == 'd' and file_date.day == current_date.day) or \
|
|
361
|
+
(frequency.lower() != 'd' and file_date.month == current_date.month): # noqa
|
|
362
|
+
with open(pickle_file, 'rb') as f:
|
|
363
|
+
if series:
|
|
364
|
+
return _process(pickle.load(f), # read pickle instead, csv ? # noqa
|
|
365
|
+
start_date, end_date).HML_DEVIL
|
|
366
|
+
else:
|
|
367
|
+
data = _process(pickle.load(f), start_date, end_date)
|
|
368
|
+
data = data.dropna()
|
|
369
|
+
return data
|
|
370
|
+
# If the pickle file is expired, delete it
|
|
371
|
+
else:
|
|
372
|
+
os.remove(pickle_file)
|
|
373
|
+
|
|
374
|
+
base_url = 'https://www.aqr.com/-/media/AQR/Documents/Insights/'
|
|
375
|
+
file = 'daily' if frequency.lower() == 'd' else 'monthly'
|
|
376
|
+
url = f'{base_url}/Data-Sets/The-Devil-in-HMLs-Details-Factors-{file}.xlsx'
|
|
377
|
+
|
|
378
|
+
print('Downloading HML Devil factors from AQR... This can take a while. Please be patient or something.') # noqa
|
|
379
|
+
|
|
380
|
+
# TODO: A progress bar until something's figured out? download_with_progress # noqa
|
|
381
|
+
# TODO: Handle interupts SIGINT, etc.
|
|
382
|
+
response = requests.get(url, verify=True, timeout=180)
|
|
383
|
+
xls = pd.ExcelFile(BytesIO(response.content))
|
|
384
|
+
|
|
385
|
+
sheets = {0: 'HML Devil', 4: 'MKT', 5: 'SMB', 7: 'UMD', 8: 'RF'}
|
|
386
|
+
dfs = []
|
|
387
|
+
|
|
388
|
+
df_dict = pd.read_excel(xls,
|
|
389
|
+
sheet_name=list(sheets.values()),
|
|
390
|
+
skiprows=18,
|
|
391
|
+
header=0,
|
|
392
|
+
index_col=0,
|
|
393
|
+
parse_dates=True)
|
|
394
|
+
|
|
395
|
+
for sheet_index, sheet_name in sheets.items():
|
|
396
|
+
df = df_dict[sheet_name]
|
|
397
|
+
|
|
398
|
+
# Use 'USA' col, except for the RF sheet, use cols first two cols
|
|
399
|
+
df = df[['USA']] if sheet_index != 8 else df.iloc[:, 0:1]
|
|
400
|
+
# TODO: allow for other countries
|
|
401
|
+
|
|
402
|
+
df.columns = [sheet_name]
|
|
403
|
+
dfs.append(df)
|
|
404
|
+
|
|
405
|
+
data = pd.concat(dfs, axis=1)
|
|
406
|
+
data.rename(columns={'MKT': 'Mkt-RF',
|
|
407
|
+
'HML Devil': 'HML_DEVIL'}, inplace=True)
|
|
408
|
+
data = data.astype(float)
|
|
409
|
+
|
|
410
|
+
with open(pickle_file, 'wb') as f:
|
|
411
|
+
pickle.dump(data, f)
|
|
412
|
+
|
|
413
|
+
if os.path.exists('hml_devil.csv'):
|
|
414
|
+
os.remove('hml_devil.csv')
|
|
415
|
+
|
|
416
|
+
data.index.name = 'date'
|
|
417
|
+
|
|
418
|
+
data.index = pd.to_datetime(data.index)
|
|
419
|
+
|
|
420
|
+
if frequency.lower() == 'd':
|
|
421
|
+
data = data.dropna()
|
|
422
|
+
|
|
423
|
+
if series:
|
|
424
|
+
return _process(data, start_date, end_date, filepath=output).HML_DEVIL
|
|
425
|
+
|
|
426
|
+
return _process(data, start_date, end_date, filepath=output)
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
def barillas_shanken_factors(frequency: str = 'M',
|
|
430
|
+
start_date: str = None,
|
|
431
|
+
end_date: str = None,
|
|
432
|
+
output: str = None) -> pd.DataFrame:
|
|
433
|
+
"""***Experimental.***
|
|
434
|
+
|
|
435
|
+
Constructs the 6-factor model of Barillas and Shanken. It's a
|
|
436
|
+
combination of the 5-factor model of Fama and French (2015), the q-factor
|
|
437
|
+
model of Hou, Xue, and Zhang (2015), and Asness and Frazzini's HML Devil.
|
|
438
|
+
This is the factor model with the highest posterior inclusion probability
|
|
439
|
+
in Barillas and Shanken (2018).
|
|
440
|
+
|
|
441
|
+
Note:
|
|
442
|
+
- Relies on the HML Devil factors being retrieved (which is very slow).
|
|
443
|
+
|
|
444
|
+
Returns:
|
|
445
|
+
pd.DataFrame: A timeseries of the factor data.
|
|
446
|
+
"""
|
|
447
|
+
q = q_factors(frequency=frequency, classic=True)[['R_IA', 'R_ROE']]
|
|
448
|
+
ff = ff_factors(model='6', frequency=frequency)[['Mkt-RF', 'SMB', 'UMD',
|
|
449
|
+
'RF']]
|
|
450
|
+
|
|
451
|
+
df = pd.merge(q, ff, left_index=True, right_index=True, how='inner')
|
|
452
|
+
|
|
453
|
+
hml_devil = hml_devil_factors(frequency=frequency, start_date=start_date,
|
|
454
|
+
series=True)
|
|
455
|
+
|
|
456
|
+
hml_devil = hml_devil.rename('HML_m')
|
|
457
|
+
hml_devil.index.name = 'date'
|
|
458
|
+
|
|
459
|
+
df = pd.merge(df, hml_devil, left_index=True,
|
|
460
|
+
right_index=True, how='inner')
|
|
461
|
+
|
|
462
|
+
return _process(df, start_date, end_date, filepath=output)
|
|
File without changes
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
import argparse
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def parse_args():
|
|
6
|
+
"""Argument parser, allowing for command line arguments.
|
|
7
|
+
This is the function used in pyproject.toml to run the CLI."""
|
|
8
|
+
parser = argparse.ArgumentParser(
|
|
9
|
+
description='Retrieve and structure data for factor models.',
|
|
10
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
11
|
+
epilog='''Example usage:
|
|
12
|
+
python main.py -m 3 -f M -s 1961-01-01 -e 1990-12-31
|
|
13
|
+
python main.py --model icr --frequency M --end 1990-12-31 --no_rf -o '~/icr.csv' ''' # noqa
|
|
14
|
+
)
|
|
15
|
+
parser.add_argument('-m', '--model', type=str, required=True,
|
|
16
|
+
help='The model to use.')
|
|
17
|
+
parser.add_argument('-f', '--freq', '--frequency', type=str,
|
|
18
|
+
required=False, default='M', help='The frequency of\
|
|
19
|
+
the data. Valid options are D, W, M, Q, A.')
|
|
20
|
+
parser.add_argument('-s', '--start', type=str, required=False,
|
|
21
|
+
help='The start date for the data.')
|
|
22
|
+
parser.add_argument('-e', '--end', type=str, required=False,
|
|
23
|
+
help='The end date for the data.')
|
|
24
|
+
parser.add_argument('-o', '--output', type=str, required=False, # noqa
|
|
25
|
+
help='The file to save the data to.')
|
|
26
|
+
parser.add_argument('--no_rf', '--no-rf', '--norf', action='store_true',
|
|
27
|
+
help='Drop the RF column from the DataFrame.')
|
|
28
|
+
return parser.parse_args()
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
import re
|
|
3
|
+
import zipfile as zip
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from io import BytesIO, StringIO
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from types import MappingProxyType
|
|
8
|
+
import pandas as pd
|
|
9
|
+
import requests
|
|
10
|
+
from dateutil import parser
|
|
11
|
+
|
|
12
|
+
__model_input_map = MappingProxyType({
|
|
13
|
+
"3": r"\b((f?)f)?3\b|(ff)?1993",
|
|
14
|
+
"5": r"\b(ff)?5|ff2015\b",
|
|
15
|
+
"4": r"\b(c(ar(hart)?)?4?|ff4|carhart1997|4)\b",
|
|
16
|
+
"6": r"\b(ff)?6|ff2018\b",
|
|
17
|
+
"q": r"\b(q(5)?|hmxz)\b",
|
|
18
|
+
"q_classic": r"\b(q4|q(_)?classic)|classic_q\b",
|
|
19
|
+
"mispricing": r"\b(sy4?|mispricing)|misp|yuan$|m4|mis|sy\b",
|
|
20
|
+
"liquidity": r"^(il)?liq(uidity)?|(pastor|ps|sp)$",
|
|
21
|
+
"icr": r"\bicr|hkm\b",
|
|
22
|
+
"dhs": r"^(\bdhs\b|behav.*)$",
|
|
23
|
+
"hml_devil": r"\bhml(_)?d(evil)?\b",
|
|
24
|
+
"barillas_shanken": r"\b(bs|bs6|barillas|shanken)\b", })
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _get_model_key(model):
|
|
28
|
+
"""
|
|
29
|
+
Convert a model name to a model key.
|
|
30
|
+
* This provides more flexibility in input by converting various model names
|
|
31
|
+
to a standardized model key.
|
|
32
|
+
|
|
33
|
+
>>> _get_model_key('ff1993')
|
|
34
|
+
'3'
|
|
35
|
+
>>> _get_model_key('liQ')
|
|
36
|
+
'liquidity'
|
|
37
|
+
>>> _get_model_key('q4_factors')
|
|
38
|
+
'q_classic'
|
|
39
|
+
>>> _get_model_key('ICR')
|
|
40
|
+
'icr'
|
|
41
|
+
"""
|
|
42
|
+
model = str(model)
|
|
43
|
+
|
|
44
|
+
for key, regex in __model_input_map.items():
|
|
45
|
+
if re.match(regex, model, re.I):
|
|
46
|
+
return key
|
|
47
|
+
raise ValueError(f'Invalid model: {model}')
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def get_file_from_url(url):
|
|
51
|
+
"""Get a file from a URL and return its content as a StringIO object."""
|
|
52
|
+
response = requests.get(url, verify=True, timeout=15)
|
|
53
|
+
response.raise_for_status()
|
|
54
|
+
response_content = response.content.decode('utf-8')
|
|
55
|
+
content = StringIO(response_content)
|
|
56
|
+
return content
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def get_zip_from_url(url):
|
|
60
|
+
"""Download a zip file from a URL and return a ZipFile object."""
|
|
61
|
+
try:
|
|
62
|
+
response = requests.get(url, timeout=15)
|
|
63
|
+
response.raise_for_status()
|
|
64
|
+
content = response.content
|
|
65
|
+
except (KeyboardInterrupt, Exception) as e:
|
|
66
|
+
print(f"An error occurred downloading the zip file from {url}: {e}")
|
|
67
|
+
raise
|
|
68
|
+
|
|
69
|
+
return zip.ZipFile(BytesIO(content))
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _save_to_file(data, filename=None, output_dir=None):
|
|
73
|
+
"""Save a pandas dataFrame to a file."""
|
|
74
|
+
if isinstance(data, (pd.DataFrame, pd.Series)):
|
|
75
|
+
formats = {
|
|
76
|
+
'.txt': lambda filename: data.to_csv(filename, sep='\t'),
|
|
77
|
+
'.csv': data.to_csv,
|
|
78
|
+
'.xlsx': data.to_excel, # TODO: style with writer
|
|
79
|
+
'.pkl': data.to_pickle,
|
|
80
|
+
'.md': data.to_markdown, }
|
|
81
|
+
|
|
82
|
+
if filename is None:
|
|
83
|
+
filename = datetime.now().strftime('%Y-%m-%d') + '.csv'
|
|
84
|
+
elif '.' not in filename:
|
|
85
|
+
filename += '.csv'
|
|
86
|
+
|
|
87
|
+
# If no output directory is provided, use the current working
|
|
88
|
+
# directory
|
|
89
|
+
if output_dir is None:
|
|
90
|
+
output_dir = Path.cwd()
|
|
91
|
+
else:
|
|
92
|
+
# Expand the '~' character in the output directory
|
|
93
|
+
output_dir = Path(output_dir).expanduser()
|
|
94
|
+
|
|
95
|
+
# Create the full file path
|
|
96
|
+
filename = output_dir / filename
|
|
97
|
+
|
|
98
|
+
# Check if file exists
|
|
99
|
+
if filename.is_file():
|
|
100
|
+
print('File exists: overwriting...')
|
|
101
|
+
|
|
102
|
+
for ext, func in formats.items():
|
|
103
|
+
if str(filename).endswith(ext):
|
|
104
|
+
func(str(filename))
|
|
105
|
+
print(f"File saved to: {filename}")
|
|
106
|
+
break
|
|
107
|
+
|
|
108
|
+
else:
|
|
109
|
+
raise ValueError('Unsupported file extension')
|
|
110
|
+
else:
|
|
111
|
+
raise ValueError('Data is not a pandas DataFrame or Series')
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _rearrange_cols(data):
|
|
115
|
+
"""Rearrange the columns of the dataframe.
|
|
116
|
+
* NOTE: this is faster:
|
|
117
|
+
cols = data.columns.values
|
|
118
|
+
cols_order = np.concatenate(([np.where(cols == 'Mkt-RF')[0], \
|
|
119
|
+
np.where((cols != 'Mkt-RF') & (cols != 'RF'))[0], \
|
|
120
|
+
np.where(cols == 'RF')[0]]))
|
|
121
|
+
return data.iloc[:, cols_order]
|
|
122
|
+
"""
|
|
123
|
+
# [TODO] ICR model has no RF or Mkt Excess return column
|
|
124
|
+
if isinstance(data, pd.Series):
|
|
125
|
+
return data
|
|
126
|
+
cols = list(data.columns)
|
|
127
|
+
if 'Mkt-RF' in cols:
|
|
128
|
+
cols.insert(0, cols.pop(cols.index('Mkt-RF')))
|
|
129
|
+
if 'RF' in cols:
|
|
130
|
+
cols.append(cols.pop(cols.index('RF')))
|
|
131
|
+
return data.loc[:, cols]
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _validate_date(date_str):
|
|
135
|
+
"""Use `dateutil.parser.parse` to validate a date format."""
|
|
136
|
+
if date_str is None:
|
|
137
|
+
return None
|
|
138
|
+
if isinstance(date_str, pd.Timestamp):
|
|
139
|
+
return date_str.strftime("%Y-%m-%d")
|
|
140
|
+
try:
|
|
141
|
+
return parser.parse(date_str).strftime("%Y-%m-%d")
|
|
142
|
+
except ValueError as err:
|
|
143
|
+
raise ValueError("Incorrect date format, use YYYY-MM-DD.") from err
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _slice_dates(data, start_date=None, end_date=None):
|
|
147
|
+
"""Slice the dataframe to the specified date range."""
|
|
148
|
+
if start_date is None and end_date is None:
|
|
149
|
+
return data
|
|
150
|
+
|
|
151
|
+
if start_date is not None:
|
|
152
|
+
start_date = _validate_date(start_date)
|
|
153
|
+
if end_date is not None:
|
|
154
|
+
end_date = _validate_date(end_date)
|
|
155
|
+
|
|
156
|
+
return data.loc[slice(start_date, end_date)]
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _process(data, start_date=None, end_date=None, filepath=None):
|
|
160
|
+
"""Process the data and optionally save it to a file.
|
|
161
|
+
* filepath: takes a filename, path or directory.
|
|
162
|
+
"""
|
|
163
|
+
data = _rearrange_cols(data)
|
|
164
|
+
data = _slice_dates(data, start_date, end_date)
|
|
165
|
+
|
|
166
|
+
if filepath:
|
|
167
|
+
# Convert the filepath to a Path object and expand the '~' character
|
|
168
|
+
filepath = Path(filepath).expanduser()
|
|
169
|
+
|
|
170
|
+
dir_path, filename = filepath.parent, filepath.name
|
|
171
|
+
|
|
172
|
+
_save_to_file(data, filename, dir_path)
|
|
173
|
+
|
|
174
|
+
return data
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2023 S. Martin
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: getfactormodels
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Retreive data for various multifactor asset pricing models.
|
|
5
|
+
Keywords: finance,pricing models,financial analysis,econometrics,asset pricing,multifactor models
|
|
6
|
+
Author-email: "S. Martin" <x512@pm.me>
|
|
7
|
+
Requires-Python: >=3.7
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
Classifier: Topic :: Office/Business :: Financial
|
|
10
|
+
Classifier: Topic :: Office/Business :: Financial :: Investment
|
|
11
|
+
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
12
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Environment :: Console
|
|
17
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
18
|
+
Classifier: Intended Audience :: Financial and Insurance Industry
|
|
19
|
+
Classifier: Development Status :: 2 - Pre-Alpha
|
|
20
|
+
Requires-Dist: numpy >=1.18.5
|
|
21
|
+
Requires-Dist: pandas >=1.4
|
|
22
|
+
Requires-Dist: requests >=2.20.0
|
|
23
|
+
Requires-Dist: pyarrow >=14.0.1
|
|
24
|
+
Requires-Dist: openpyxl >=3.0.3
|
|
25
|
+
Requires-Dist: tabulate >=0.8.7
|
|
26
|
+
Requires-Dist: ruff ; extra == "dev"
|
|
27
|
+
Requires-Dist: pytest-cov ; extra == "dev"
|
|
28
|
+
Requires-Dist: pytest ; extra == "dev"
|
|
29
|
+
Requires-Dist: pytest-randomly ; extra == "dev"
|
|
30
|
+
Requires-Dist: isort ; extra == "dev"
|
|
31
|
+
Requires-Dist: nox ; extra == "dev"
|
|
32
|
+
Project-URL: Homepage, https://github.com/x512/getfactormodels
|
|
33
|
+
Provides-Extra: dev
|
|
34
|
+
|
|
35
|
+
<a name="readme-top"></a>
|
|
36
|
+
|
|
37
|
+
# getfactormodels
|
|
38
|
+
|
|
39
|
+

|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
Reliably retrieve data for various multi-factor asset pricing models.
|
|
43
|
+
|
|
44
|
+
## Models
|
|
45
|
+
|
|
46
|
+
- The 3-factor, 5-factor, and 6-factor models of Fama & French <sup>[[1]](#1) [[3]](#3) [[4]](#4)</sup>
|
|
47
|
+
- Mark Carhart's 4-factor model <sup>[[2]](#2)</sup>
|
|
48
|
+
- Pastor and Stambaugh's liquidity factors <sup>[[5]](#5)</sup>
|
|
49
|
+
- Mispricing factors of Stambaugh and Yuan<sup>[[6]](#6)</sup>
|
|
50
|
+
- The $q$*-factor* model of Hou, Mo, Xue and Zhang<sup>[[7]](#7)</sup>
|
|
51
|
+
- The augmented $q^5$*-factor* model of Hou, Mo, Xue and Zhang<sup>[[8]](#8)</sup>
|
|
52
|
+
- *Intermediary Capital Ratio* (ICR) of He, Kelly & Manela<sup>[[9]](#9)</sup>
|
|
53
|
+
- The *DHS behavioural factors* of Daniel, Hirshleifer & Sun<sup>[[10]](#10)</sup>
|
|
54
|
+
- The *HML* $^{DEVIL}$ factor of Asness & Frazzini<sup>[[11]](#11)</sup>
|
|
55
|
+
- The 6-factor model of Barillas and Shanken<sup>[[12]](#12)</sup>
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
_Thanks to: Kenneth French, Robert Stambaugh, Lin Sun, Zhiguo He, AQR Capital Management (AQR.com) and Hou, Xue and Zhang (global-q.org), for their research and for the datasets they publically provide._
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
## Installation
|
|
62
|
+
|
|
63
|
+
`getfactormodels` requires Python ``>=3.7``
|
|
64
|
+
|
|
65
|
+
* Install with pip:
|
|
66
|
+
```shell
|
|
67
|
+
$ pip install getfactormodels
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Usage
|
|
71
|
+
|
|
72
|
+
#### Python
|
|
73
|
+
|
|
74
|
+
After installing, import ``getfactormodels`` and call ``get_factors()`` with the ``model`` and ``frequency`` parameters. Optionally, specify a ``start_date`` and ``end_date``
|
|
75
|
+
* For example, to retrieve the daily q-factor model data:
|
|
76
|
+
|
|
77
|
+
```py
|
|
78
|
+
import getfactormodels as getfactormodels
|
|
79
|
+
|
|
80
|
+
df = getfactormodels.get_factors(model='q', frequency='d')
|
|
81
|
+
```
|
|
82
|
+
> _Trimmed output:_
|
|
83
|
+
```txt
|
|
84
|
+
> df
|
|
85
|
+
Mkt-RF R_ME R_IA R_ROE R_EG RF
|
|
86
|
+
date
|
|
87
|
+
1967-01-03 0.000778 0.004944 0.001437 -0.007118 -0.008563 0.000187
|
|
88
|
+
1967-01-04 0.001667 -0.003487 -0.000631 -0.002044 -0.000295 0.000187
|
|
89
|
+
1967-01-05 0.012990 0.004412 -0.005688 0.000838 -0.003075 0.000187
|
|
90
|
+
1967-01-06 0.007230 0.006669 0.008897 0.003603 0.002669 0.000187
|
|
91
|
+
1967-01-09 0.008439 0.006315 0.000331 0.004949 0.002979 0.000187
|
|
92
|
+
... ... ... ... ... ... ...
|
|
93
|
+
2022-12-23 0.005113 -0.001045 0.004000 0.010484 0.003852 0.000161
|
|
94
|
+
2022-12-27 -0.005076 -0.001407 0.010190 0.009206 0.003908 0.000161
|
|
95
|
+
2022-12-28 -0.012344 -0.004354 0.000133 -0.010457 -0.004953 0.000161
|
|
96
|
+
2022-12-29 0.018699 0.008568 -0.008801 -0.012686 -0.002162 0.000161
|
|
97
|
+
2022-12-30 -0.002169 0.001840 0.001011 -0.004151 -0.003282 0.000161
|
|
98
|
+
|
|
99
|
+
[14096 rows x 6 columns]
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
* or, retreive the monthly liquidity factors of Pastor and Stambaugh for the 1990s:
|
|
103
|
+
|
|
104
|
+
```py
|
|
105
|
+
import getfactormodels as getfactormodels
|
|
106
|
+
|
|
107
|
+
df = getfactormodels.get_factors(model='liquidity', frequency='m', start_date='1990-01-01', end_date='1999-12-31')
|
|
108
|
+
```
|
|
109
|
+
> If you don't have time to type `liquidity`, type `liq`, or `ps`--there's a handy regex.
|
|
110
|
+
|
|
111
|
+
* or, saving the monthly 3-factor model of Fama & French to a file:
|
|
112
|
+
|
|
113
|
+
```py
|
|
114
|
+
import getfactormodels as gfm
|
|
115
|
+
|
|
116
|
+
df = gfm.get_factors(model='ff3', frequency='m', output="ff3_data.csv")
|
|
117
|
+
```
|
|
118
|
+
>The output parameter accepts a filename, path or directory, and can be one of csv, md, txt, xlsx, pkl.
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
You can also import just the models that you need.
|
|
122
|
+
|
|
123
|
+
* For example, to import only the *ICR* and *q*-factor models:
|
|
124
|
+
|
|
125
|
+
```py
|
|
126
|
+
from getfactormodels import icr_factors, q_factors
|
|
127
|
+
|
|
128
|
+
# Passing a model function with no params defaults to monthly.
|
|
129
|
+
df = icr_factors()
|
|
130
|
+
|
|
131
|
+
# The 'q' models, and the 3-factor model of Fama-French also have weekly data.
|
|
132
|
+
df = q_factors(frequency="W", start_date="1992-01-01)
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
* If using ``ff_factors()``, then an additional ``model`` parameter should be specified:
|
|
136
|
+
|
|
137
|
+
```py
|
|
138
|
+
from getfactormodels import ff_factors
|
|
139
|
+
|
|
140
|
+
# To get annual data for the 5-factor model:
|
|
141
|
+
data = ff_factors(model="5", frequency="Y", output=".xlsx")
|
|
142
|
+
|
|
143
|
+
# Daily 3-factor model data, since 1970 (not specifying an end date
|
|
144
|
+
# will return data up until today):
|
|
145
|
+
data = ff_factors(model="3", frequency="D", start_date="1970-01-01")
|
|
146
|
+
```
|
|
147
|
+
> Output allows just an extension to be specified.
|
|
148
|
+
|
|
149
|
+
* or import all the models:
|
|
150
|
+
|
|
151
|
+
```py
|
|
152
|
+
from getfactormodels import models
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
* There's also the `FactorExtractor` class that the CLI uses (it doesn't really do a whole lot yet):
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
from getfactormodels import FactorExtractor
|
|
159
|
+
|
|
160
|
+
fe = FactorExtractor(model='carhart', frequency='m', start_date='1980-01-01', end_date='1980-05-01')
|
|
161
|
+
fe.get_factors()
|
|
162
|
+
fe.to_file('carhart_factors.md')
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
* _The resulting ``carhart_factors.md`` file will look like this:_
|
|
166
|
+
|
|
167
|
+
| date | Mkt-RF | SMB | HML | MOM | RF |
|
|
168
|
+
|:--------------------|---------:|--------:|--------:|--------:|-------:|
|
|
169
|
+
| 1980-01-31 00:00:00 | 0.0551 | 0.0162 | 0.0175 | 0.0755 | 0.008 |
|
|
170
|
+
| 1980-02-29 00:00:00 | -0.0122 | -0.0185 | 0.0061 | 0.0788 | 0.0089 |
|
|
171
|
+
| 1980-03-31 00:00:00 | -0.129 | -0.0664 | -0.0101 | -0.0955 | 0.0121 |
|
|
172
|
+
| 1980-04-30 00:00:00 | 0.0397 | 0.0105 | 0.0106 | -0.0043 | 0.0126 |
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
#### Using the CLI
|
|
176
|
+
* You can also use getfactormodels from the command line.
|
|
177
|
+
|
|
178
|
+
```bash
|
|
179
|
+
$ getfactormodels -h
|
|
180
|
+
|
|
181
|
+
usage: getfactormodels [-h] -m MODEL [-f FREQ] [-s START] [-e END] [-o OUTPUT] [--no_rf]
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
* An example of how to use the CLI to retrieve the Fama-French 3-factor model data:
|
|
185
|
+
```bash
|
|
186
|
+
getfactormodels --model ff3 --frequency M --start-date 1960-01-01 --end-date 2020-12-31 --output "filename.csv"
|
|
187
|
+
```
|
|
188
|
+
> Accepted file extensions are .csv, .txt, .xlsx, and .md. If no extension is given, the output file will be .csv. The --output flag allows a filename, filepath or a directory. If only an extension is provided (including the . else it'll be passed as a filename), a name will be generated.
|
|
189
|
+
|
|
190
|
+
* Here's another example that retrieves the annual Fama-French 5-factor data without the RF column:
|
|
191
|
+
|
|
192
|
+
```sh
|
|
193
|
+
getfactormodels -m 5 -f Y -s 1960-01-01 -e 2020-12-31 --no_rf -o ~/some_dir/filename.xlsx
|
|
194
|
+
```
|
|
195
|
+
> `--no_rf` will return the factor model without an RF column.
|
|
196
|
+
|
|
197
|
+
## References
|
|
198
|
+
1. <a id="1"></a> E. F. Fama and K. R. French, ‘Common risk factors in the returns on stocks and bonds’, *Journal of Financial Economics*, vol. 33, no. 1, pp. 3–56, 1993. [PDF](https://people.duke.edu/~charvey/Teaching/BA453_2006/FF_Common_risk.pdf)
|
|
199
|
+
2. <a id="2"></a> M. Carhart, ‘On Persistence in Mutual Fund Performance’, *Journal of Finance*, vol. 52, no. 1, pp. 57–82, 1997. [PDF](https://onlinelibrary.wiley.com/doi/full/10.1111/j.1540-6261.1997.tb03808.x)
|
|
200
|
+
3. <a id="3"></a> E. F. Fama and K. R. French, ‘A five-factor asset pricing model’, *Journal of Financial Economics*, vol. 116, no. 1, pp. 1–22, 2015. [PDF](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2287202)
|
|
201
|
+
4. <a id="4"></a> E. F. Fama and K. R. French, ‘Choosing factors’, *Journal of Financial Economics*, vol. 128, no. 2, pp. 234–252, 2018. [PDF](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2668236)
|
|
202
|
+
5. <a id="5"></a>L. Pastor and R. Stambaugh, ‘Liquidity Risk and Expected Stock Returns’, *Journal of Political Economy*, vol. 111, no. 3, pp. 642–685, 2003. [PDF](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=279804)
|
|
203
|
+
6. <a id="6"></a>R. F. Stambaugh and Y. Yuan, ‘Mispricing Factors’, *The Review of Financial Studies*, vol. 30, no. 4, pp. 1270–1315, 12 2016. [PDF](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2626701)
|
|
204
|
+
7. <a id="7"></a>K. Hou, H. Mo, C. Xue, and L. Zhang, ‘Which Factors?’, *National Bureau of Economic Research, Inc*, 2014. [PDF](https://academic.oup.com/rof/article/23/1/1/5133564)
|
|
205
|
+
8. <a id="8"></a>K. Hou, H. Mo, C. Xue, and L. Zhang, ‘An Augmented q-Factor Model with Expected Growth*’, *Review of Finance*, vol. 25, no. 1, pp. 1–41, 02 2020. [PDF](https://academic.oup.com/rof/article/25/1/1/5727769)
|
|
206
|
+
9. <a id="9"></a>Z. He, B. Kelly, and A. Manela, ‘Intermediary asset pricing: New evidence from many asset classes’, *Journal of Financial Economics*, vol. 126, no. 1, pp. 1–35, 2017. [PDF](https://cpb-us-w2.wpmucdn.com/voices.uchicago.edu/dist/6/2325/files/2019/12/jfepublishedversion.pdf)
|
|
207
|
+
10. <a id="10"></a>K. Daniel, D. Hirshleifer, and L. Sun, ‘Short- and Long-Horizon Behavioral Factors’, *Review of Financial Studies*, vol. 33, no. 4, pp. 1673–1736, 2020. [PDF](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3086063)
|
|
208
|
+
11. <a id="11"></a>C. Asness and A. Frazzini, ‘The Devil in HML’s Details’, *The Journal of Portfolio Management*, vol. 39, pp. 49–68, 2013. [PDF](https://stockmarketmba.com/docs/Asness_Frazzini_AdjustHML.pdf)
|
|
209
|
+
12. <a id="12"></a>F. Barillas and J. Shanken, ‘Comparing Asset Pricing Models’, *Journal of Finance*, vol. 73, no. 2, pp. 715–754, 2018. [PDF](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2700000)
|
|
210
|
+
|
|
211
|
+
**Data sources:**
|
|
212
|
+
|
|
213
|
+
* K. French, "Data Library," Tuck School of Business at Dartmouth.
|
|
214
|
+
[Link](https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html)
|
|
215
|
+
* R. Stambaugh, "Liquidity" and "Mispricing" factor datasets, Wharton School, University of Pennsylvania.
|
|
216
|
+
[Link](https://finance.wharton.upenn.edu/~stambaug/)
|
|
217
|
+
* Z. He, "Intermediary Capital Ratio and Risk Factor" dataset, University of Chicago.
|
|
218
|
+
[Link](https://voices.uchicago.edu/zhiguohe/data-and-empirical-patterns/intermediary-capital-ratio-and-risk-factor/)
|
|
219
|
+
* K. Hou, G. Xue, R. Zhang, "The Hou-Xue-Zhang q-factors data library," at global-q.org.
|
|
220
|
+
[Link](http://global-q.org/factors.html)
|
|
221
|
+
* AQR Capital Management's Data Sets.
|
|
222
|
+
* Lin Sun, DHS Behavioural factors [Link](https://sites.google.com/view/linsunhome)
|
|
223
|
+
|
|
224
|
+
<p align="right">(<a href="#readme-top">back to top</a>)</p>
|
|
225
|
+
|
|
226
|
+
## License
|
|
227
|
+
|
|
228
|
+

|
|
229
|
+
|
|
230
|
+
*The code in this project is released under the [MIT License]().*
|
|
231
|
+
|
|
232
|
+
[](https://pycqa.github.io/isort/)
|
|
233
|
+
[](https://simpleicons.org/?q=ruff)
|
|
234
|
+
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
getfactormodels/__init__.py,sha256=OBR7bzlxouVp0fnnNiTPEu2Dg5WT7jY-Ss4AXhpPaQ0,1882
|
|
2
|
+
getfactormodels/__main__.py,sha256=wog0NDpyKmUmFWJ0brIJIPoKN_m0HGXtLiXEuO3PrRs,5565
|
|
3
|
+
getfactormodels/models/__init__.py,sha256=TuTNVPCEwd9xxlAHzoEk4sYDenYQcBdCiMbiulOT-Y0,1237
|
|
4
|
+
getfactormodels/models/ff_models.py,sha256=8ek3Q40acyOuILcbiAJvYfPnyPisY4oPapeRf4w9FzI,4492
|
|
5
|
+
getfactormodels/models/models.py,sha256=VdRWssoqKZKojJyzRj0KstXbLex7WfPU5bgN9NblQoE,17884
|
|
6
|
+
getfactormodels/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
getfactormodels/utils/cli.py,sha256=Gtyuph2HCYgnx-qy6Aq4QRgDJzzxQ_bPX5iG463uJkE,1455
|
|
8
|
+
getfactormodels/utils/utils.py,sha256=G-LcqJLWP-VGt3QlLsmQDwd7fbFK4itlNwpeADZRE_s,5559
|
|
9
|
+
getfactormodels-0.0.1.dist-info/entry_points.txt,sha256=BeSOuEFV8LlnhTxpKpbeJLQfl_kS-bVif0k4Z1ghOnY,65
|
|
10
|
+
getfactormodels-0.0.1.dist-info/LICENSE,sha256=3AA29XMl8p-SVQzn1hMvq478uj3FHVjEUknv6YaGeYk,1066
|
|
11
|
+
getfactormodels-0.0.1.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
|
12
|
+
getfactormodels-0.0.1.dist-info/METADATA,sha256=KKWEMEIvtRE--mXWonwR8Mwj05zxrKryyRtIwfmzbVg,11895
|
|
13
|
+
getfactormodels-0.0.1.dist-info/RECORD,,
|