siat 3.10.125__py3-none-any.whl → 3.10.127__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- siat/common.py +106 -2
- siat/exchange_bond_china.pickle +0 -0
- siat/fund_china.pickle +0 -0
- siat/stock.py +10 -2
- siat/stock_info.pickle +0 -0
- {siat-3.10.125.dist-info → siat-3.10.127.dist-info}/METADATA +235 -226
- siat-3.10.127.dist-info/RECORD +76 -0
- {siat-3.10.125.dist-info → siat-3.10.127.dist-info}/WHEEL +1 -1
- {siat-3.10.125.dist-info → siat-3.10.127.dist-info/licenses}/LICENSE +0 -0
- {siat-3.10.125.dist-info → siat-3.10.127.dist-info}/top_level.txt +0 -0
- siat/__init__ -20240701.py +0 -65
- siat/__init__.py.backup_20250214.py +0 -73
- siat/alpha_vantage_test.py +0 -24
- siat/assets_liquidity_test.py +0 -44
- siat/barrons_scraping_test.py +0 -276
- siat/beta_adjustment_test.py +0 -77
- siat/bond_test.py +0 -142
- siat/capm_beta_test.py +0 -49
- siat/cmat_commons.py +0 -961
- siat/compare_cross_test.py +0 -117
- siat/concepts_iwencai.py +0 -86
- siat/concepts_kpl.py +0 -93
- siat/cryptocurrency_test.py +0 -71
- siat/derivative.py +0 -1111
- siat/economy-20230125.py +0 -1206
- siat/economy_test.py +0 -360
- siat/esg_test.py +0 -63
- siat/fama_french_test.py +0 -115
- siat/financial_statements_test.py +0 -31
- siat/financials2 - /321/205/320/231/320/277/321/206/320/254/320/274.py" +0 -341
- siat/financials_china2_test.py +0 -67
- siat/financials_china2_test2.py +0 -88
- siat/financials_china2_test3.py +0 -87
- siat/financials_china_test.py +0 -475
- siat/financials_china_test2.py +0 -197
- siat/financials_china_test2_fin_indicator.py +0 -197
- siat/financials_test.py +0 -713
- siat/fred_test.py +0 -40
- siat/fund_china_test.py +0 -175
- siat/fund_test.py +0 -40
- siat/future_china_test.py +0 -37
- siat/global_index_test.py +0 -66
- siat/grafix_test.py +0 -112
- siat/holding_risk_test.py +0 -13
- siat/local_debug_test.py +0 -100
- siat/markowitz2-20240620.py +0 -2614
- siat/markowitz_ccb_test.py +0 -37
- siat/markowitz_ef_test.py +0 -136
- siat/markowitz_old.py +0 -871
- siat/markowitz_simple-20230709.py +0 -370
- siat/markowitz_test.py +0 -164
- siat/markowitz_test2.py +0 -69
- siat/ml_cases_example1.py +0 -60
- siat/option_china_test.py +0 -447
- siat/option_pricing_test.py +0 -81
- siat/option_sina_api_test.py +0 -112
- siat/proxy_test.py +0 -84
- siat/quandl_test.py +0 -39
- siat/risk_adjusted_return_test.py +0 -81
- siat/risk_evaluation_test.py +0 -96
- siat/risk_free_rate_test.py +0 -127
- siat/sector_china_test.py +0 -203
- siat/security_price.py +0 -831
- siat/security_prices_test.py +0 -310
- siat/security_trend2-20240620.py +0 -493
- siat/setup.py +0 -41
- siat/shenwan index history test.py +0 -41
- siat/stock_china_test.py +0 -38
- siat/stock_info_test.py +0 -189
- siat/stock_list_china_test.py +0 -33
- siat/stock_technical-20240620.py +0 -2736
- siat/stock_test.py +0 -487
- siat/temp.py +0 -36
- siat/test2_graphviz.py +0 -484
- siat/test_graphviz.py +0 -411
- siat/test_markowitz_simple.py +0 -198
- siat/test_markowitz_simple_revised.py +0 -215
- siat/test_markowitz_simple_revised2.py +0 -218
- siat/transaction_test.py +0 -436
- siat/translate-20230125.py +0 -2107
- siat/translate-20230206.py +0 -2109
- siat/translate-20230215.py +0 -2158
- siat/translate_20240606.py +0 -4206
- siat/translate_241003_keep.py +0 -4300
- siat/universal_test.py +0 -100
- siat/valuation_market_china_test.py +0 -36
- siat-3.10.125.dist-info/RECORD +0 -152
siat/cmat_commons.py
DELETED
@@ -1,961 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
"""
|
3
|
-
版权:王德宏,北京外国语大学国际商学院
|
4
|
-
功能:提供CMAT资本市场与投资管理分析工具包的精选公共函数,便于各个插件直接引用或复制
|
5
|
-
版本:1.10,2019-10-9
|
6
|
-
"""
|
7
|
-
|
8
|
-
#==============================================================================
|
9
|
-
#屏蔽所有警告性信息
|
10
|
-
import warnings; warnings.filterwarnings('ignore')
|
11
|
-
#==============================================================================
|
12
|
-
#以下使用雅虎财经数据源
|
13
|
-
#==============================================================================
|
14
|
-
def check_period(fromdate,todate):
|
15
|
-
"""
|
16
|
-
功能:根据开始/结束日期检查期间日期的合理性
|
17
|
-
输入参数:
|
18
|
-
fromdate:开始日期。格式:YYYY-MM-DD
|
19
|
-
enddate:开始日期。格式:YYYY-MM-DD
|
20
|
-
输出参数:
|
21
|
-
validity:期间合理性。True-合理,False-不合理
|
22
|
-
start:开始日期。格式:datetime类型
|
23
|
-
end:结束日期。格式:datetime类型
|
24
|
-
"""
|
25
|
-
import pandas as pd
|
26
|
-
try:
|
27
|
-
start=pd.to_datetime(fromdate)
|
28
|
-
except:
|
29
|
-
print("Error #1(check_period): invalid date:",fromdate)
|
30
|
-
return None,None,None
|
31
|
-
try:
|
32
|
-
end=pd.to_datetime(todate)
|
33
|
-
except:
|
34
|
-
print("Error #2(check_period): invalid date:",todate)
|
35
|
-
return None,None,None
|
36
|
-
if start > end:
|
37
|
-
print("Error #3(check_period): invalid period: from",fromdate,"to",todate)
|
38
|
-
return None,None,None
|
39
|
-
|
40
|
-
return True,start,end
|
41
|
-
|
42
|
-
#==============================================================================
|
43
|
-
def get_prices_yahoo(ticker,fromdate,todate):
|
44
|
-
"""
|
45
|
-
功能:从雅虎财经抓取股票股价或指数价格或投资组合价值,使用pandas_datareader
|
46
|
-
输入:股票代码或股票代码列表,开始日期,结束日期
|
47
|
-
ticker: 股票代码或者股票代码列表。
|
48
|
-
大陆股票代码加上后缀.SZ或.SS,港股代码去掉前导0加后缀.HK
|
49
|
-
fromdate: 样本开始日期。
|
50
|
-
todate: 样本结束日期。既可以是今天日期,也可以是一个历史日期
|
51
|
-
|
52
|
-
输出:股票价格序列,按照日期升序排列。原汁原味的抓取数据
|
53
|
-
*Close price adjusted for splits.
|
54
|
-
**Adjusted close price adjusted for both dividends and splits.
|
55
|
-
"""
|
56
|
-
#检查期间合理性
|
57
|
-
result,start,end=check_period(fromdate,todate)
|
58
|
-
if result is None:
|
59
|
-
print("Error #1(get_prices_yahoo): incorrect date or invalid period!")
|
60
|
-
return None
|
61
|
-
|
62
|
-
#抓取雅虎股票价格
|
63
|
-
from pandas_datareader import data
|
64
|
-
try:
|
65
|
-
prices=data.DataReader(ticker,'yahoo',start,end)
|
66
|
-
except:
|
67
|
-
print("Error #2(get_prices_yahoo): failed to get stock prices!")
|
68
|
-
print("Information:",ticker,fromdate,todate)
|
69
|
-
print("Possible reasons:")
|
70
|
-
print(" 1)internet connection problems.")
|
71
|
-
print(" 2)incorrect stock code.")
|
72
|
-
print(" 3)stock delisted or suspended during the period.")
|
73
|
-
return None
|
74
|
-
if len(prices)==0:
|
75
|
-
print("Error #3(get_prices_yahoo): fetched empty stock data!")
|
76
|
-
print("Possible reasons:")
|
77
|
-
print(" 1)internet connection problems.")
|
78
|
-
print(" 2)incorrect stock code.")
|
79
|
-
print(" 3)stock delisted or suspended during the period.")
|
80
|
-
return None
|
81
|
-
|
82
|
-
#去掉比起始日期更早的样本
|
83
|
-
price2=prices[prices.index >= start]
|
84
|
-
#去掉比结束日期更晚的样本
|
85
|
-
price2=price2[price2.index <= end]
|
86
|
-
|
87
|
-
#按日期升序排序,近期的价格排在后面
|
88
|
-
sortedprice=price2.sort_index(axis=0,ascending=True)
|
89
|
-
|
90
|
-
return sortedprice
|
91
|
-
|
92
|
-
|
93
|
-
#==============================================================================
|
94
|
-
def cvt_yftickerlist(ticker):
|
95
|
-
"""
|
96
|
-
功能:转换pandas_datareader的tickerlist为yfinance的格式
|
97
|
-
输入参数:单一股票代码或pandas_datareader的股票代码列表
|
98
|
-
|
99
|
-
输出参数:yfinance格式的股票代码列表
|
100
|
-
"""
|
101
|
-
#如果不是股票代码列表,直接返回股票代码
|
102
|
-
if not isinstance(ticker,list): return ticker,False
|
103
|
-
|
104
|
-
#如果是股票代码列表,但只有一个元素
|
105
|
-
if len(ticker)==1: return ticker[0],False
|
106
|
-
|
107
|
-
#如果是股票代码列表,有两个及以上元素
|
108
|
-
yftickerlist=ticker[0]
|
109
|
-
for t in ticker[1:]:
|
110
|
-
yftickerlist=yftickerlist+' '+t
|
111
|
-
|
112
|
-
return yftickerlist,True
|
113
|
-
|
114
|
-
|
115
|
-
if __name__=='__main__':
|
116
|
-
tl1,islist=cvt_yftickerlist('AAPL')
|
117
|
-
tl1,islist=cvt_yftickerlist(['AAPL'])
|
118
|
-
tl1,islist=cvt_yftickerlist(['AAPL','MSFT'])
|
119
|
-
tl1,islist=cvt_yftickerlist(['AAPL','MSFT','0700.hk'])
|
120
|
-
print(tl1)
|
121
|
-
|
122
|
-
#==============================================================================
|
123
|
-
def get_prices_yf(ticker,start,end):
|
124
|
-
"""
|
125
|
-
功能:从雅虎财经抓取股价,使用yfinance(对非美股抓取速度快,但有时不太稳定)
|
126
|
-
输入:股票代码或股票代码列表,开始日期,结束日期
|
127
|
-
ticker: 股票代码或股票代码列表。大陆股票代码加上后缀.SZ或.SS,港股代码去掉前导0加后缀.HK
|
128
|
-
start: 样本开始日期,尽量远的日期,以便取得足够多的原始样本,yyyy-mm-dd
|
129
|
-
end: 样本结束日期,既可以是今天日期,也可以是一个历史日期
|
130
|
-
|
131
|
-
输出:指定收盘价格序列,最新日期的股价排列在前
|
132
|
-
"""
|
133
|
-
|
134
|
-
#仅为调试用的函数入口参数,正式使用前需要注释掉!
|
135
|
-
"""
|
136
|
-
ticker=['AAPL','MSFT']
|
137
|
-
start='2019-10-1'
|
138
|
-
end='2019-10-10'
|
139
|
-
"""
|
140
|
-
#---------------------------------------------
|
141
|
-
|
142
|
-
#转换日期
|
143
|
-
r,startdate,enddate=check_period(start,end)
|
144
|
-
if r is None:
|
145
|
-
print("Error #1(get_prices_yf): invalid time period")
|
146
|
-
return None
|
147
|
-
|
148
|
-
#抓取雅虎股票价格
|
149
|
-
import yfinance as yf
|
150
|
-
try:
|
151
|
-
ticker1,islist=cvt_yftickerlist(ticker)
|
152
|
-
if not islist:
|
153
|
-
stock=yf.Ticker(ticker1)
|
154
|
-
#下载单一股票的股价
|
155
|
-
p=stock.history(start=start,end=end)
|
156
|
-
else:
|
157
|
-
#下载股票列表的股价
|
158
|
-
p=yf.download(ticker1,start=start,end=end,progress=False)
|
159
|
-
|
160
|
-
except:
|
161
|
-
print("Error #1(get_prices_yf): server not responsed!")
|
162
|
-
return None
|
163
|
-
|
164
|
-
if len(p) == 0:
|
165
|
-
print("Error #2(get_prices_yf): server reached but returned no data!")
|
166
|
-
return None
|
167
|
-
|
168
|
-
#去掉比起始日期更早的样本
|
169
|
-
price=p[p.index >= startdate]
|
170
|
-
#去掉比结束日期更晚的样本
|
171
|
-
price2=price[price.index <= enddate]
|
172
|
-
|
173
|
-
#按日期升序排序,近期的价格排在后面
|
174
|
-
sortedprice=price2.sort_index(axis=0,ascending=True)
|
175
|
-
|
176
|
-
#返回日期升序的股价序列
|
177
|
-
return sortedprice
|
178
|
-
|
179
|
-
if __name__=='__main__':
|
180
|
-
df1=get_prices_yf('AAPL','2019-10-1','2019-10-8')
|
181
|
-
df2=get_prices_yf(['AAPL'],'2019-10-1','2019-10-8')
|
182
|
-
df3=get_prices_yf(['AAPL','MSFT'],'2019-10-1','2019-10-8')
|
183
|
-
df4=get_prices_yf(['AAPL','MSFT','IBM'],'2019-10-1','2019-10-8')
|
184
|
-
|
185
|
-
#==============================================================================
|
186
|
-
def get_stock_prices(ticker,fromdate,todate):
|
187
|
-
"""
|
188
|
-
功能:从雅虎财经抓取股票股价或指数价格
|
189
|
-
输入:股票代码或股票代码列表,开始日期,结束日期
|
190
|
-
ticker: 股票代码。大陆股票代码加上后缀.SZ或.SS,港股代码去掉前导0加后缀.HK
|
191
|
-
fromdate: 样本开始日期,尽量远的日期,以便取得足够多的原始样本
|
192
|
-
todate: 样本结束日期,既可以是今天日期,也可以是一个历史日期
|
193
|
-
|
194
|
-
输出:股票价格序列,按照日期升序排列。标记股票代码、星期几和收盘价调整标志
|
195
|
-
*Close price adjusted for splits.
|
196
|
-
**Adjusted close price adjusted for both dividends and splits.
|
197
|
-
"""
|
198
|
-
|
199
|
-
#仅为调试用的函数入口参数,正式使用前需要注释掉!
|
200
|
-
#ticker='AAPL'
|
201
|
-
#fromdate='2019-9-1'
|
202
|
-
#todate='2019-9-15'
|
203
|
-
#---------------------------------------------
|
204
|
-
|
205
|
-
#抓取股票价格
|
206
|
-
prices=get_prices_yahoo(ticker,fromdate,todate)
|
207
|
-
if prices is None:
|
208
|
-
print("Error #2(get_stock_prices): failed to get stock prices!")
|
209
|
-
return None
|
210
|
-
|
211
|
-
#提取日期和星期几
|
212
|
-
prices['Date']=prices.index.strftime("%Y-%m-%d")
|
213
|
-
prices['Weekday']=prices.index.weekday+1
|
214
|
-
|
215
|
-
#标记股票代码
|
216
|
-
try:
|
217
|
-
stocklist=False
|
218
|
-
prices['Stock']=ticker #单个股票代码
|
219
|
-
except:
|
220
|
-
stocklist=True
|
221
|
-
prices['Stock']=str(ticker) #股票代码列表
|
222
|
-
|
223
|
-
#标记收盘价是否经过调整(股票分拆分红)
|
224
|
-
if not stocklist: #若为股票列表不做此步
|
225
|
-
prices['Adjustment']=prices.apply(lambda x: \
|
226
|
-
False if x['Close']==x['Adj Close'] else True,axis=1)
|
227
|
-
stockdf=prices[['Stock','Date','Weekday', \
|
228
|
-
'Open','Close','Adj Close','Volume','Adjustment']]
|
229
|
-
else:
|
230
|
-
stockdf=prices[['Stock','Date','Weekday', \
|
231
|
-
'Open','Close','Adj Close','Volume']]
|
232
|
-
return stockdf
|
233
|
-
|
234
|
-
|
235
|
-
if __name__=='__main__':
|
236
|
-
df1=get_stock_prices('601857.SS','2012-01-01','2019-12-31')
|
237
|
-
df2=get_stock_prices('MSFT','01/01/2015','06/30/2019')
|
238
|
-
df2[df2.Date == '06/28/2019']
|
239
|
-
df2[(df2.Date>='03/20/2019') & (df2.Date<='03/29/2019')]
|
240
|
-
df3=get_stock_prices('^GSPC','1/1/2015','6/30/2019')
|
241
|
-
df4=get_stock_prices('002504.SZ','01/01/2015','06/30/2019')
|
242
|
-
df5=get_stock_prices('000001.SS','01/01/2015','07/16/2019')
|
243
|
-
df6=get_stock_prices('0700.HK','01/01/2015','06/30/2019')
|
244
|
-
|
245
|
-
#==============================================================================
|
246
|
-
def get_portfolio_prices(tickerlist,sharelist,fromdate,todate):
|
247
|
-
"""
|
248
|
-
功能:抓取投资组合的每日价值
|
249
|
-
输入:股票代码列表,份额列表,开始日期,结束日期
|
250
|
-
tickerlist: 股票代码列表
|
251
|
-
sharelist:持有份额列表,与股票代码列表一一对应
|
252
|
-
fromdate: 样本开始日期。格式:'YYYY-MM-DD'
|
253
|
-
todate: 样本结束日期。既可以是今天日期,也可以是一个历史日期
|
254
|
-
|
255
|
-
输出:投资组合的价格序列,按照日期升序排列
|
256
|
-
"""
|
257
|
-
|
258
|
-
#仅为调试用的函数入口参数,正式使用前需要注释掉!
|
259
|
-
#tickerlist=['AAPL','MSFT']
|
260
|
-
#sharelist=[2,1]
|
261
|
-
#fromdate='2019-8-1'
|
262
|
-
#todate ='2019-8-31'
|
263
|
-
#---------------------------------------------
|
264
|
-
|
265
|
-
#检查股票列表个数与份额列表个数是否一致
|
266
|
-
if len(tickerlist) != len(sharelist):
|
267
|
-
print("Error #1(get_portfolio_prices): numbers of stocks and shares mismatch.")
|
268
|
-
return None
|
269
|
-
|
270
|
-
#从雅虎财经抓取股票价格
|
271
|
-
p=get_prices_yahoo(tickerlist,fromdate,todate)
|
272
|
-
|
273
|
-
import pandas as pd
|
274
|
-
#计算投资者的开盘价
|
275
|
-
op=p['Open']
|
276
|
-
#计算投资组合的价值
|
277
|
-
oprice=pd.DataFrame(op.dot(sharelist))
|
278
|
-
oprice.rename(columns={0: 'Open'}, inplace=True)
|
279
|
-
|
280
|
-
#计算投资者的收盘价
|
281
|
-
cp=p['Close']
|
282
|
-
#计算投资组合的价值
|
283
|
-
cprice=pd.DataFrame(cp.dot(sharelist))
|
284
|
-
cprice.rename(columns={0: 'Close'}, inplace=True)
|
285
|
-
|
286
|
-
#计算投资者的调整收盘价
|
287
|
-
acp=p['Adj Close']
|
288
|
-
#计算投资组合的价值
|
289
|
-
acprice=pd.DataFrame(acp.dot(sharelist))
|
290
|
-
acprice.rename(columns={0: 'Adj Close'}, inplace=True)
|
291
|
-
|
292
|
-
#合成开盘价、收盘价和调整收盘价
|
293
|
-
ocprice=pd.merge(oprice,cprice,how='inner',left_index=True,right_index=True)
|
294
|
-
prices=pd.merge(ocprice,acprice,how='inner',left_index=True,right_index=True)
|
295
|
-
|
296
|
-
#提取日期和星期几
|
297
|
-
prices['Date']=prices.index.strftime("%Y-%m-%d")
|
298
|
-
prices['Weekday']=prices.index.weekday+1
|
299
|
-
|
300
|
-
prices['Portfolio']=str(tickerlist)
|
301
|
-
prices['Shares']=str(sharelist)
|
302
|
-
prices['Adjustment']=prices.apply(lambda x: \
|
303
|
-
False if x['Close']==x['Adj Close'] else True, axis=1)
|
304
|
-
|
305
|
-
stockdf=prices[['Portfolio','Shares','Date','Weekday', \
|
306
|
-
'Open','Close','Adj Close','Adjustment']]
|
307
|
-
|
308
|
-
return stockdf
|
309
|
-
|
310
|
-
|
311
|
-
#==============================================================================
|
312
|
-
#以下专门处理tushare数据源
|
313
|
-
#==============================================================================
|
314
|
-
def convert_date_ts(y4m2d2):
|
315
|
-
"""
|
316
|
-
功能:日期格式转换,YYYY-MM-DD-->YYYYMMDD,用于tushare
|
317
|
-
输入:日期,格式:YYYY-MM-DD
|
318
|
-
输出:日期,格式:YYYYMMDD
|
319
|
-
"""
|
320
|
-
import pandas as pd
|
321
|
-
try: date1=pd.to_datetime(y4m2d2)
|
322
|
-
except:
|
323
|
-
print("Error #1(convert_date_tushare): invalid date:",y4m2d2)
|
324
|
-
return None
|
325
|
-
else:
|
326
|
-
date2=date1.strftime('%Y')+date1.strftime('%m')+date1.strftime('%d')
|
327
|
-
return date2
|
328
|
-
|
329
|
-
if __name__ == '__main__':
|
330
|
-
convert_date_ts("2019/11/1")
|
331
|
-
|
332
|
-
#==============================================================================
|
333
|
-
def init_ts():
|
334
|
-
"""
|
335
|
-
功能:初始化tushare pro,登录后才能下载数据
|
336
|
-
"""
|
337
|
-
import tushare as ts
|
338
|
-
#设置token
|
339
|
-
token='49f134b05e668d288be43264639ac77821ab9938ff40d6013c0ed24f'
|
340
|
-
pro=ts.pro_api(token)
|
341
|
-
|
342
|
-
return pro
|
343
|
-
#==============================================================================
|
344
|
-
def get_stock_prices_ts(ticker,fromdate,todate):
|
345
|
-
"""
|
346
|
-
功能:从tushare抓取大陆股票股价
|
347
|
-
特别注意:只能处理大陆股票和指数价格,不能处理投资组合价值,与雅虎财经的函数不同
|
348
|
-
输入:股票代码,开始日期,结束日期
|
349
|
-
ticker: 股票代码。也可以是股指代码
|
350
|
-
股票代码加上后缀.SZ或.SH
|
351
|
-
fromdate: 样本开始日期。
|
352
|
-
todate: 样本结束日期。既可以是今天日期,也可以是一个历史日期
|
353
|
-
|
354
|
-
输出:股票价格序列,按照日期升序排列。
|
355
|
-
"""
|
356
|
-
#仅为调试使用,完成后应注释掉
|
357
|
-
#ticker='601857.SS'
|
358
|
-
#fromdate='2019-8-1'
|
359
|
-
#todate='2019-12-31'
|
360
|
-
|
361
|
-
#检查期间合理性
|
362
|
-
result,start,end=check_period(fromdate,todate)
|
363
|
-
if result is None:
|
364
|
-
print("Error #1(get_stock_prices_ts): invalid date period!")
|
365
|
-
return None
|
366
|
-
|
367
|
-
#转换日期格式为tushare
|
368
|
-
start=convert_date_ts(fromdate)
|
369
|
-
end=convert_date_ts(todate)
|
370
|
-
#转换股票代码.SS为.SH(tushare使用.SH而不是雅虎的.SS)
|
371
|
-
ticker1=ticker.upper()
|
372
|
-
try: ticker2=ticker1.replace('.SS','.SH')
|
373
|
-
except: pass
|
374
|
-
|
375
|
-
#初始化tushare
|
376
|
-
pro=init_ts()
|
377
|
-
#抓取tushare股票价格
|
378
|
-
try:
|
379
|
-
prices=pro.daily(ts_code=ticker2,start_date=start,end_date=end)
|
380
|
-
except:
|
381
|
-
print("Error #2(get_stock_prices_ts): failed to get stock prices!")
|
382
|
-
print("Information:",ticker2,fromdate,todate)
|
383
|
-
print("Possible reasons:")
|
384
|
-
print(" 1)internet connection problems.")
|
385
|
-
print(" 2)incorrect stock code.")
|
386
|
-
print(" 3)stock delisted or suspended during the period.")
|
387
|
-
return None
|
388
|
-
|
389
|
-
#未出错,但也未能抓取到数据,可能ticker是指数代码
|
390
|
-
if len(prices)==0:
|
391
|
-
try:
|
392
|
-
prices=pro.index_daily(ts_code=ticker2,start_date=start,end_date=end)
|
393
|
-
except:
|
394
|
-
print("Error #3(get_stock_prices_ts): failed to get index prices!")
|
395
|
-
print("Information:",ticker2,fromdate,todate)
|
396
|
-
print("Possible reasons:")
|
397
|
-
print(" 1)internet connection problems.")
|
398
|
-
print(" 2)data source server busy.")
|
399
|
-
if len(prices)==0:
|
400
|
-
print("Error #4(get_stock_prices_ts): fetched empty index data!")
|
401
|
-
print("Information:",ticker2,fromdate,todate)
|
402
|
-
return None
|
403
|
-
|
404
|
-
#按照雅虎财经格式改列名
|
405
|
-
prices.rename(columns={'ts_code':'Stock','open':'Open','high':'High', \
|
406
|
-
'low':'Low','close':'Close', \
|
407
|
-
'amount':'Amount'}, inplace = True)
|
408
|
-
#修改交易日期格式为YYYY-MM-DD
|
409
|
-
prices['YYYY']=prices.apply(lambda x:x['trade_date'][0:4],axis=1)
|
410
|
-
prices['MM']=prices.apply(lambda x:x['trade_date'][4:6],axis=1)
|
411
|
-
prices['DD']=prices.apply(lambda x:x['trade_date'][6:8],axis=1)
|
412
|
-
prices['Date']=prices['YYYY']+'-'+prices['MM']+'-'+prices['DD']
|
413
|
-
#将交易量从手改为股,1手=100股
|
414
|
-
prices['Volume']=prices['vol']*100
|
415
|
-
|
416
|
-
#设置索引
|
417
|
-
import pandas as pd
|
418
|
-
prices['DateIndex']=pd.to_datetime(prices['Date'])
|
419
|
-
prices.set_index('DateIndex',inplace=True)
|
420
|
-
#提取星期
|
421
|
-
prices['Weekday']=prices.index.weekday+1
|
422
|
-
|
423
|
-
#按日期升序排序,近期的价格排在后面
|
424
|
-
price2=prices.sort_index(axis=0,ascending=True)
|
425
|
-
|
426
|
-
#去掉比起始日期更早的样本
|
427
|
-
price2=price2[price2.index >= start]
|
428
|
-
#去掉比结束日期更晚的样本
|
429
|
-
price2=price2[price2.index <= end]
|
430
|
-
|
431
|
-
#只保留需要的列
|
432
|
-
stockdf=price2[['Stock','Date','Weekday','Open','Close','Volume']]
|
433
|
-
|
434
|
-
return stockdf
|
435
|
-
|
436
|
-
|
437
|
-
#==============================================================================
|
438
|
-
def get_portfolio_prices_ts(tickerlist,sharelist,fromdate,todate):
|
439
|
-
"""
|
440
|
-
功能:从tushare抓取投资组合的每日价值
|
441
|
-
输入:股票代码列表,份额列表,开始日期,结束日期
|
442
|
-
tickerlist: 仅限大陆股票代码列表
|
443
|
-
sharelist:持有份额列表,与股票代码列表一一对应
|
444
|
-
fromdate: 样本开始日期。格式:'YYYY-MM-DD'
|
445
|
-
todate: 样本结束日期。既可以是今天日期,也可以是一个历史日期
|
446
|
-
|
447
|
-
输出:投资组合的价格序列,按照日期升序排列
|
448
|
-
"""
|
449
|
-
|
450
|
-
#仅为调试用的函数入口参数,正式使用前需要注释掉!
|
451
|
-
#tickerlist=['601857.SH','000002.SZ']
|
452
|
-
#sharelist=[2,1]
|
453
|
-
#fromdate='2019-8-1'
|
454
|
-
#todate ='2019-8-31'
|
455
|
-
#---------------------------------------------
|
456
|
-
|
457
|
-
#检查股票列表个数与份额列表个数是否一致
|
458
|
-
if len(tickerlist) != len(sharelist):
|
459
|
-
print("Error #1(get_portfolio_prices): numbers of stocks and shares mismatch.")
|
460
|
-
return None
|
461
|
-
|
462
|
-
import pandas as pd
|
463
|
-
#循环抓取投资组合中的各个成分股,分别存入开盘价、收盘价和调整收盘价字典
|
464
|
-
dict_open={}
|
465
|
-
dict_close={}
|
466
|
-
for t in tickerlist:
|
467
|
-
p=get_stock_prices_ts(t,fromdate,todate)
|
468
|
-
if p is None:
|
469
|
-
print("Error #2(get_portfolio_prices_ts): fetch stock prices failed.")
|
470
|
-
print("Information:",t,fromdate,todate)
|
471
|
-
return None
|
472
|
-
|
473
|
-
p_open=p.copy()
|
474
|
-
p_open2=pd.DataFrame(p_open['Open'])
|
475
|
-
p_open2.rename(columns={'Open':t},inplace = True)
|
476
|
-
dict_open[t]=p_open2
|
477
|
-
|
478
|
-
p_close=p.copy()
|
479
|
-
p_close2=pd.DataFrame(p_close['Close'])
|
480
|
-
p_close2.rename(columns={'Close':t},inplace = True)
|
481
|
-
dict_close[t]=p_close2
|
482
|
-
|
483
|
-
#合成各个成分股的开盘价、收盘价
|
484
|
-
for t in tickerlist:
|
485
|
-
if t == tickerlist[0]: #第一个成分股
|
486
|
-
p_open3=dict_open[t]
|
487
|
-
p_close3=dict_close[t]
|
488
|
-
else:
|
489
|
-
p_open3=pd.merge(p_open3,dict_open[t],how='inner', \
|
490
|
-
left_index=True,right_index=True)
|
491
|
-
p_close3=pd.merge(p_close3,dict_close[t],how='inner', \
|
492
|
-
left_index=True,right_index=True)
|
493
|
-
|
494
|
-
#计算投资组合的开盘价
|
495
|
-
oprice=pd.DataFrame(p_open3.dot(sharelist))
|
496
|
-
oprice.rename(columns={0:'Open'},inplace=True)
|
497
|
-
|
498
|
-
#计算投资者的收盘价
|
499
|
-
cprice=pd.DataFrame(p_close3.dot(sharelist))
|
500
|
-
cprice.rename(columns={0:'Close'},inplace=True)
|
501
|
-
|
502
|
-
#合成开盘价、收盘价
|
503
|
-
prices=pd.merge(oprice,cprice,how='inner',left_index=True,right_index=True)
|
504
|
-
|
505
|
-
#提取日期和星期几
|
506
|
-
prices['Date']=prices.index.strftime("%Y-%m-%d")
|
507
|
-
prices['Weekday']=prices.index.weekday+1
|
508
|
-
|
509
|
-
prices['Portfolio']=str(tickerlist)
|
510
|
-
prices['Shares']=str(sharelist)
|
511
|
-
|
512
|
-
stockdf=prices[['Portfolio','Shares','Date','Weekday','Open','Close']]
|
513
|
-
|
514
|
-
return stockdf
|
515
|
-
|
516
|
-
#==============================================================================
|
517
|
-
#以下专门处理fama_french因子数据源
|
518
|
-
#==============================================================================
|
519
|
-
def get_ff_factors(start,end,scope='US',factor='FF3',freq='daily'):
|
520
|
-
|
521
|
-
import pandas as pd
|
522
|
-
s=pd.DataFrame([
|
523
|
-
['US','FF3','monthly','F-F_Research_Data_Factors',0],
|
524
|
-
['US','FF3','yearly','F-F_Research_Data_Factors',1],
|
525
|
-
['US','FF3','weekly','F-F_Research_Data_Factors_weekly',0],
|
526
|
-
['US','FF3','daily','F-F_Research_Data_Factors_daily',0],
|
527
|
-
['US','FF5','monthly','F-F_Research_Data_5_Factors_2x3',0],
|
528
|
-
['US','FF5','yearly','F-F_Research_Data_5_Factors_2x3',1],
|
529
|
-
['US','FF5','daily','F-F_Research_Data_5_Factors_2x3_daily',0],
|
530
|
-
['US','Mom','monthly','F-F_Momentum_Factor',0],
|
531
|
-
['US','Mom','yearly','F-F_Momentum_Factor',1],
|
532
|
-
['US','Mom','daily','F-F_Momentum_Factor_daily',0],
|
533
|
-
['US','ST_Rev','monthly','F-F_ST_Reversal_Factor',0],
|
534
|
-
['US','ST_Rev','yearly','F-F_ST_Reversal_Factor',1],
|
535
|
-
['US','ST_Rev','daily','F-F_ST_Reversal_Factor_daily',0],
|
536
|
-
['US','LT_Rev','monthly','F-F_LT_Reversal_Factor',0],
|
537
|
-
['US','LT_Rev','yearly','F-F_LT_Reversal_Factor',1],
|
538
|
-
['US','LT_Rev','daily','F-F_LT_Reversal_Factor_daily',0], \
|
539
|
-
['Global','FF3','monthly','Global_3_Factors',0],
|
540
|
-
['Global','FF3','yearly','Global_3_Factors',1],
|
541
|
-
['Global','FF3','daily','Global_3_Factors_Daily',0],
|
542
|
-
['Global_ex_US','FF3','monthly','Global_ex_US_3_Factors',0],
|
543
|
-
['Global_ex_US','FF3','yearly','Global_ex_US_3_Factors',1],
|
544
|
-
['Global_ex_US','FF3','daily','Global_ex_US_3_Factors_Daily',0],
|
545
|
-
['Europe','FF3','monthly','Europe_3_Factors',0],
|
546
|
-
['Europe','FF3','yearly','Europe_3_Factors',1],
|
547
|
-
['Europe','FF3','daily','Europe_3_Factors_Daily',0],
|
548
|
-
['Japan','FF3','monthly','Japan_3_Factors',0],
|
549
|
-
['Japan','FF3','yearly','Japan_3_Factors',1],
|
550
|
-
['Japan','FF3','daily','Japan_3_Factors_Daily',0],
|
551
|
-
['Asia_Pacific_ex_Japan','FF3','monthly','Asia_Pacific_ex_Japan_3_Factors',0],
|
552
|
-
['Asia_Pacific_ex_Japan','FF3','yearly','Asia_Pacific_ex_Japan_3_Factors',1],
|
553
|
-
['Asia_Pacific_ex_Japan','FF3','daily','Asia_Pacific_ex_Japan_3_Factors_Daily',0],
|
554
|
-
['North_America','FF3','monthly','North_America_3_Factors',0],
|
555
|
-
['North_America','FF3','yearly','North_America_3_Factors',1],
|
556
|
-
['North_America','FF3','daily','North_America_3_Factors_Daily',0], \
|
557
|
-
['Global','FF5','monthly','Global_5_Factors',0],
|
558
|
-
['Global','FF5','yearly','Global_5_Factors',1],
|
559
|
-
['Global','FF5','daily','Global_5_Factors_Daily',0],
|
560
|
-
['Global_ex_US','FF5','monthly','Global_ex_US_5_Factors',0],
|
561
|
-
['Global_ex_US','FF5','yearly','Global_ex_US_5_Factors',1],
|
562
|
-
['Global_ex_US','FF5','daily','Global_ex_US_5_Factors_Daily',0],
|
563
|
-
['Europe','FF5','monthly','Europe_5_Factors',0],
|
564
|
-
['Europe','FF5','yearly','Europe_5_Factors',1],
|
565
|
-
['Europe','FF5','daily','Europe_5_Factors_Daily',0],
|
566
|
-
['Japan','FF5','monthly','Japan_5_Factors',0],
|
567
|
-
['Japan','FF5','yearly','Japan_5_Factors',1],
|
568
|
-
['Japan','FF5','daily','Japan_5_Factors_Daily',0],
|
569
|
-
['Asia_Pacific_ex_Japan','FF5','monthly','Asia_Pacific_ex_Japan_5_Factors',0],
|
570
|
-
['Asia_Pacific_ex_Japan','FF5','yearly','Asia_Pacific_ex_Japan_5_Factors',1],
|
571
|
-
['Asia_Pacific_ex_Japan','FF5','daily','Asia_Pacific_ex_Japan_5_Factors_Daily',0],
|
572
|
-
['North_America','FF5','monthly','North_America_5_Factors',0],
|
573
|
-
['North_America','FF5','yearly','North_America_5_Factors',1],
|
574
|
-
['North_America','FF5','daily','North_America_5_Factors_Daily',0], \
|
575
|
-
['Global','Mom','monthly','Global_Mom_Factor',0],
|
576
|
-
['Global','Mom','yearly','Global_Mom_Factor',1],
|
577
|
-
['Global','Mom','daily','Global_Mom_Factor_Daily',0],
|
578
|
-
['Global_ex_US','Mom','monthly','Global_ex_US_Mom_Factor',0],
|
579
|
-
['Global_ex_US','Mom','yearly','Global_ex_US_Mom_Factor',1],
|
580
|
-
['Global_ex_US','Mom','daily','Global_ex_US_Mom_Factor_Daily',0],
|
581
|
-
['Europe','Mom','monthly','Europe_Mom_Factor',0],
|
582
|
-
['Europe','Mom','yearly','Europe_Mom_Factor',1],
|
583
|
-
['Europe','Mom','daily','Europe_Mom_Factor_Daily',0],
|
584
|
-
['Japan','Mom','monthly','Japan_Mom_Factor',0],
|
585
|
-
['Japan','Mom','yearly','Japan_Mom_Factor',1],
|
586
|
-
['Japan','Mom','daily','Japan_Mom_Factor_Daily',0],
|
587
|
-
['Asia_Pacific_ex_Japan','Mom','monthly','Asia_Pacific_ex_Japan_MOM_Factor',0],
|
588
|
-
['Asia_Pacific_ex_Japan','Mom','yearly','Asia_Pacific_ex_Japan_MOM_Factor',1],
|
589
|
-
['Asia_Pacific_ex_Japan','Mom','daily','Asia_Pacific_ex_Japan_MOM_Factor_Daily',0],
|
590
|
-
['North_America','Mom','monthly','North_America_Mom_Factor',0],
|
591
|
-
['North_America','Mom','yearly','North_America_Mom_Factor',1],
|
592
|
-
['North_America','Mom','daily','North_America_Mom_Factor_Daily',0]
|
593
|
-
], columns=['scope','factor','freq','symbol','seq'])
|
594
|
-
|
595
|
-
#数据源
|
596
|
-
source='famafrench'
|
597
|
-
if scope == "China": scope="Asia_Pacific_ex_Japan"
|
598
|
-
|
599
|
-
#匹配:scope+factor+freq
|
600
|
-
ss=s[s['scope'].isin([scope]) & s['factor'].isin([factor]) \
|
601
|
-
& s['freq'].isin([freq])]
|
602
|
-
#如果未找到匹配的模式,显示信息后返回
|
603
|
-
if len(ss)==0:
|
604
|
-
print("Error #1(get_ff_factors): No data item available for",scope,factor,freq)
|
605
|
-
return None
|
606
|
-
|
607
|
-
#重新索引,第1行的索引编号为0
|
608
|
-
sss=ss.reset_index(drop=True)
|
609
|
-
#取出对应的symbol
|
610
|
-
symbol=sss.iloc[0]['symbol']
|
611
|
-
#取出对应的月(0)/年(1)编号
|
612
|
-
seq=sss.iloc[0]['seq']
|
613
|
-
|
614
|
-
#抓取数据
|
615
|
-
import pandas_datareader.data as web
|
616
|
-
try:
|
617
|
-
ds = web.DataReader(symbol,source,start,end)
|
618
|
-
except:
|
619
|
-
print("Error #2(get_ff_factors): Server did not respond")
|
620
|
-
return None
|
621
|
-
|
622
|
-
#提取希望的资产定价因子
|
623
|
-
factor_df=ds[seq]
|
624
|
-
if len(factor_df)==0:
|
625
|
-
print("Error #3(get_ff_factors): Server returned empty data for",start,end,scope,factor,freq)
|
626
|
-
return None
|
627
|
-
|
628
|
-
return factor_df
|
629
|
-
|
630
|
-
|
631
|
-
if __name__=='__main__':
|
632
|
-
ff3_daily=get_ff_factors('2019-05-01','2019-06-30','US','FF3','daily')
|
633
|
-
|
634
|
-
|
635
|
-
#==============================================================================
|
636
|
-
#以下为线性回归函数
|
637
|
-
#==============================================================================
|
638
|
-
def check_reg_sample(X,y):
|
639
|
-
"""
|
640
|
-
功能:检查回归的样本数据是否存在问题
|
641
|
-
输入参数:
|
642
|
-
X:解释变量
|
643
|
-
y:因变量
|
644
|
-
输出参数:
|
645
|
-
True:样本满足条件,False:不满足回归条件
|
646
|
-
"""
|
647
|
-
result=True
|
648
|
-
#检查样本个数是否为空
|
649
|
-
if (len(X)==0):
|
650
|
-
print("Error #1(check_reg_sample): no obs for independent variable(s)")
|
651
|
-
print("Independent variable(s):",X)
|
652
|
-
result=False
|
653
|
-
if (len(y)==0):
|
654
|
-
print("Error #2(check_reg_sample): no obs for dependent variable")
|
655
|
-
print("Dependent variable:",y)
|
656
|
-
result=False
|
657
|
-
|
658
|
-
#检查样本中是否含有空缺值
|
659
|
-
X1=X.dropna()
|
660
|
-
if (len(X) != len(X1)):
|
661
|
-
print("Error #3(check_reg_sample): missing value(s) in independent variable(s)")
|
662
|
-
print("Missing value(s) in independent variable(s):",len(X)-len(X1))
|
663
|
-
result=False
|
664
|
-
y1=y.dropna()
|
665
|
-
if (len(y) != len(y1)):
|
666
|
-
print("Error #4(check_reg_sample): missing value(s) in dependent variable")
|
667
|
-
print("Missing value(s) in dependent variable:",len(y)-len(y1))
|
668
|
-
result=False
|
669
|
-
|
670
|
-
#检查因变量与解释变量的样本个数是否一致
|
671
|
-
if len(X) != len(y):
|
672
|
-
print("Error #5(check_reg_sample): sample numbers of independent/dependent variables not match")
|
673
|
-
print("Obs of X and y respectively:",len(X),len(y))
|
674
|
-
result=False
|
675
|
-
|
676
|
-
return result
|
677
|
-
|
678
|
-
#==============================================================================
|
679
|
-
def linreg(X,y):
|
680
|
-
"""
|
681
|
-
函数功能:单个解释变量的简单线性回归,例如CAPM回归。y=a+b*X
|
682
|
-
输入参数:
|
683
|
-
X: 解释变量。必须为序列,一维数组
|
684
|
-
y: 因变量。必须为序列,一维数组
|
685
|
-
输出数据:
|
686
|
-
beta:解释变量的系数。如果解释变量为单变量则为单一数值,否则为列表
|
687
|
-
alpha:截距项
|
688
|
-
r_sqr:拟合优度
|
689
|
-
p_value:解释变量的系数显著性。如果解释变量为单变量则为单一数值,否则为列表
|
690
|
-
std_err:误差项
|
691
|
-
注意:X和y中不能含有NaN/None等空缺值
|
692
|
-
"""
|
693
|
-
|
694
|
-
check=check_reg_sample(X,y)
|
695
|
-
if not check:
|
696
|
-
print("Error #1(linreg): invalid sample for regression")
|
697
|
-
return None,None,None,None,None
|
698
|
-
|
699
|
-
#一元简单回归
|
700
|
-
from scipy import stats
|
701
|
-
(beta,alpha,r_value,p_value,std_err)=stats.linregress(X,y)
|
702
|
-
r_sqr=r_value**2
|
703
|
-
|
704
|
-
return beta,alpha,r_sqr,p_value,std_err
|
705
|
-
|
706
|
-
if __name__=='__main__':
|
707
|
-
pass
|
708
|
-
|
709
|
-
|
710
|
-
#==============================================================================
|
711
|
-
def sigstars(p_value):
|
712
|
-
"""
|
713
|
-
功能:将p_value转换成显著性的星星
|
714
|
-
"""
|
715
|
-
if p_value >= 0.1:
|
716
|
-
stars=" "
|
717
|
-
return stars
|
718
|
-
if 0.1 > p_value >= 0.05:
|
719
|
-
stars="* "
|
720
|
-
return stars
|
721
|
-
if 0.05 > p_value >= 0.01:
|
722
|
-
stars="** "
|
723
|
-
return stars
|
724
|
-
if 0.01 > p_value:
|
725
|
-
stars="***"
|
726
|
-
return stars
|
727
|
-
|
728
|
-
#==============================================================================
|
729
|
-
def regparms(results):
|
730
|
-
"""
|
731
|
-
功能:将sm回归结果生成数据框,包括变量名称、系数数值、t值、p值和显著性星星
|
732
|
-
"""
|
733
|
-
|
734
|
-
import pandas as pd
|
735
|
-
#取系数
|
736
|
-
params=results.params
|
737
|
-
df_params=pd.DataFrame(params)
|
738
|
-
df_params.columns=['coef']
|
739
|
-
|
740
|
-
#取t值
|
741
|
-
tvalues=results.tvalues
|
742
|
-
df_tvalues=pd.DataFrame(tvalues)
|
743
|
-
df_tvalues.columns=['t_values']
|
744
|
-
|
745
|
-
#取p值
|
746
|
-
pvalues=results.pvalues
|
747
|
-
df_pvalues=pd.DataFrame(pvalues)
|
748
|
-
df_pvalues.columns=['p_values']
|
749
|
-
|
750
|
-
#生成星星
|
751
|
-
df_pvalues['sig']=df_pvalues['p_values'].apply(lambda x:sigstars(x))
|
752
|
-
|
753
|
-
#合成
|
754
|
-
parms1=pd.merge(df_params,df_tvalues, \
|
755
|
-
how='inner',left_index=True,right_index=True)
|
756
|
-
parms2=pd.merge(parms1,df_pvalues, \
|
757
|
-
how='inner',left_index=True,right_index=True)
|
758
|
-
|
759
|
-
return parms2
|
760
|
-
#==============================================================================
|
761
|
-
def smreg(X,y):
|
762
|
-
"""
|
763
|
-
函数功能:多元线性回归。y=a+b1*x1+b2*x2+b3*x3
|
764
|
-
输入参数:
|
765
|
-
X: 解释变量。多维数组,数据框
|
766
|
-
y: 因变量。必须为序列,一维数组
|
767
|
-
输出数据:
|
768
|
-
beta:解释变量的系数。如果解释变量为单变量则为单一数值,否则为列表
|
769
|
-
alpha:截距项
|
770
|
-
r_sqr:拟合优度
|
771
|
-
p_value:解释变量的系数显著性。如果解释变量为单变量则为单一数值,否则为列表
|
772
|
-
std_err:误差项
|
773
|
-
注意:X和y中不能含有NaN/None等空缺值
|
774
|
-
"""
|
775
|
-
|
776
|
-
check=check_reg_sample(X,y)
|
777
|
-
if not check:
|
778
|
-
print("Error #1(smreg): invalid sample for regression")
|
779
|
-
return None,None,None,None,None
|
780
|
-
|
781
|
-
import statsmodels.api as sm
|
782
|
-
#加入截距项
|
783
|
-
X1 = sm.add_constant(X)
|
784
|
-
#多元线性回归
|
785
|
-
reg = sm.OLS(y,X1).fit()
|
786
|
-
#回归结果
|
787
|
-
parms=regparms(reg)
|
788
|
-
|
789
|
-
return parms
|
790
|
-
|
791
|
-
if __name__=='__main__':
|
792
|
-
pass
|
793
|
-
#==============================================================================
|
794
|
-
#以下为不涉及股票数据源的公共工具函数
|
795
|
-
#==============================================================================
|
796
|
-
def draw_lines(df,y_label,x_label,axhline_value,axhline_label,title_txt, \
|
797
|
-
data_label=True):
|
798
|
-
"""
|
799
|
-
函数功能:根据df的内容绘制折线图
|
800
|
-
输入参数:
|
801
|
-
df:数据框。有几个字段就绘制几条折现。必须索引,索引值将作为X轴标记点
|
802
|
-
axhline_label: 水平辅助线标记。如果为空值则不绘制水平辅助线
|
803
|
-
axhline_value: 水平辅助线的y轴位置
|
804
|
-
y_label:y轴标记
|
805
|
-
x_label:x轴标记
|
806
|
-
title_txt:标题。如需多行,中间用\n分割
|
807
|
-
|
808
|
-
输出:
|
809
|
-
绘制折线图
|
810
|
-
无返回数据
|
811
|
-
"""
|
812
|
-
import matplotlib.pyplot as plt
|
813
|
-
|
814
|
-
#取得df字段名列表
|
815
|
-
collist=df.columns.values.tolist()
|
816
|
-
|
817
|
-
#绘制折线图
|
818
|
-
for c in collist:
|
819
|
-
plt.plot(df[c],label=c,lw=3)
|
820
|
-
#为折线加数据标签
|
821
|
-
if data_label==True:
|
822
|
-
for a,b in zip(df.index,df[c]):
|
823
|
-
plt.text(a,b+0.02,str(round(b,2)), \
|
824
|
-
ha='center',va='bottom',fontsize=7)
|
825
|
-
|
826
|
-
#绘制水平辅助线
|
827
|
-
if axhline_label !="":
|
828
|
-
plt.axhline(y=axhline_value,label=axhline_label,color='green',linestyle=':')
|
829
|
-
|
830
|
-
#坐标轴标记
|
831
|
-
plt.ylabel(y_label,fontweight='bold')
|
832
|
-
if x_label != "":
|
833
|
-
plt.xlabel(x_label,fontweight='bold')
|
834
|
-
#图示标题
|
835
|
-
plt.title(title_txt,fontweight='bold')
|
836
|
-
plt.xticks(rotation=45)
|
837
|
-
plt.legend(loc='best')
|
838
|
-
plt.show()
|
839
|
-
|
840
|
-
return
|
841
|
-
|
842
|
-
if __name__=='__main__':
|
843
|
-
title_txt="Stock Risk \nCAPM Beta Trends"
|
844
|
-
draw_lines(df,"market line",1.0,"Beta coefficient","",title_txt)
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
#==============================================================================
|
849
|
-
def save_to_excel(df,filedir,excelfile,sheetname):
|
850
|
-
"""
|
851
|
-
函数功能:将df保存到Excel文件。
|
852
|
-
如果目录不存在提示出错;如果Excel文件不存在则创建之文件并保存到指定的sheet;
|
853
|
-
如果Excel文件存在但sheet不存在则增加sheet并保存df内容,原有sheet内容不变;
|
854
|
-
如果Excel文件和sheet都存在则追加df内容到已有sheet的末尾
|
855
|
-
输入参数:
|
856
|
-
df: 数据框
|
857
|
-
filedir: 目录
|
858
|
-
excelfile: Excel文件名,不带目录,后缀为.xls或.xlsx
|
859
|
-
sheetname:Excel文件中的sheet名
|
860
|
-
输出:
|
861
|
-
保存df到Excel文件
|
862
|
-
无返回数据
|
863
|
-
|
864
|
-
注意:如果df中含有以文本表示的数字,写入到Excel会被自动转换为数字类型保存。
|
865
|
-
从Excel中读出后为数字类型,因此将会与df的类型不一致
|
866
|
-
"""
|
867
|
-
|
868
|
-
#检查目录是否存在
|
869
|
-
import os
|
870
|
-
try:
|
871
|
-
os.chdir(filedir)
|
872
|
-
except:
|
873
|
-
print("Error #1(save_to_excel): folder does not exist")
|
874
|
-
print("Information:",filedir)
|
875
|
-
return
|
876
|
-
|
877
|
-
#取得df字段列表
|
878
|
-
dflist=df.columns
|
879
|
-
#合成完整的带目录的文件名
|
880
|
-
filename=filedir+'/'+excelfile
|
881
|
-
|
882
|
-
import pandas as pd
|
883
|
-
try:
|
884
|
-
file1=pd.ExcelFile(excelfile)
|
885
|
-
except:
|
886
|
-
#不存在excelfile文件,直接写入
|
887
|
-
df.to_excel(filename,sheet_name=sheetname, \
|
888
|
-
header=True,encoding='utf-8')
|
889
|
-
print("***Results saved in",filename,"@ sheet",sheetname)
|
890
|
-
return
|
891
|
-
else:
|
892
|
-
#已存在excelfile文件,先将所有sheet的内容读出到dict中
|
893
|
-
dict=pd.read_excel(file1, None)
|
894
|
-
file1.close()
|
895
|
-
|
896
|
-
#获得所有sheet名字
|
897
|
-
sheetlist=list(dict.keys())
|
898
|
-
|
899
|
-
#检查新的sheet名字是否已存在
|
900
|
-
try:
|
901
|
-
pos=sheetlist.index(sheetname)
|
902
|
-
except:
|
903
|
-
#不存在重复
|
904
|
-
dup=False
|
905
|
-
else:
|
906
|
-
#存在重复,合并内容
|
907
|
-
dup=True
|
908
|
-
#合并之前可能需要对df中以字符串表示的数字字段进行强制类型转换.astype('int')
|
909
|
-
df1=dict[sheetlist[pos]][dflist]
|
910
|
-
dfnew=pd.concat([df1,df],axis=0,ignore_index=True)
|
911
|
-
dict[sheetlist[pos]]=dfnew
|
912
|
-
|
913
|
-
#将原有内容写回excelfile
|
914
|
-
result=pd.ExcelWriter(filename)
|
915
|
-
for s in sheetlist:
|
916
|
-
df1=dict[s][dflist]
|
917
|
-
df1.to_excel(result,s,header=True,index=True,encoding='utf-8')
|
918
|
-
#写入新内容
|
919
|
-
if not dup: #sheetname未重复
|
920
|
-
df.to_excel(result,sheetname,header=True,index=True,encoding='utf-8')
|
921
|
-
try:
|
922
|
-
result.save()
|
923
|
-
result.close()
|
924
|
-
except:
|
925
|
-
print("Error #2(save_to_excel): writing file permission denied")
|
926
|
-
print("Information:",filename)
|
927
|
-
return
|
928
|
-
print("***Results saved in",filename,"@ sheet",sheetname)
|
929
|
-
return
|
930
|
-
|
931
|
-
if __name__=='__main__':
|
932
|
-
pass
|
933
|
-
#==============================================================================
|
934
|
-
def gen_yearlist(start_year,end_year):
|
935
|
-
"""
|
936
|
-
功能:产生从start_year到end_year的一个年度列表
|
937
|
-
输入参数:
|
938
|
-
start_year: 开始年份,字符串
|
939
|
-
end_year:截止年份
|
940
|
-
输出参数:
|
941
|
-
年份字符串列表
|
942
|
-
"""
|
943
|
-
#仅为测试使用,完成后应注释掉
|
944
|
-
#start_year='2010'
|
945
|
-
#end_year='2019'
|
946
|
-
|
947
|
-
import numpy as np
|
948
|
-
start=int(start_year)
|
949
|
-
end=int(end_year)
|
950
|
-
num=end-start+1
|
951
|
-
ylist=np.linspace(start,end,num=num,endpoint=True)
|
952
|
-
|
953
|
-
yearlist=[]
|
954
|
-
for y in ylist:
|
955
|
-
yy='%d' %y
|
956
|
-
yearlist=yearlist+[yy]
|
957
|
-
#print(yearlist)
|
958
|
-
|
959
|
-
return yearlist
|
960
|
-
#==============================================================================
|
961
|
-
|