tushare 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,155 @@
1
+ require 'tushare/util'
2
+
3
+ module Tushare
4
+ module Internet
5
+ # 电影票房
6
+ module BoxOffice
7
+ extend Tushare::Util
8
+ extend self
9
+
10
+ # 获取实时电影票房数据
11
+ # 数据来源:EBOT艺恩票房智库
12
+ # return
13
+ # -------
14
+ # DataFrame
15
+ # BoxOffice 实时票房(万)
16
+ # Irank 排名
17
+ # MovieName 影片名
18
+ # boxPer 票房占比 (%)
19
+ # movieDay 上映天数
20
+ # sumBoxOffice 累计票房(万)
21
+ # time 数据获取时间
22
+ def realtime_boxoffice
23
+ url = format(MOVIE_BOX, P_TYPE['http'], DOMAINS['mbox'], BOX, _random)
24
+ resp = HTTParty.get url
25
+ return nil if resp.body.length < 15
26
+ json = JSON.parse resp
27
+ result = []
28
+ now = Time.now
29
+ json['data2'].each do |object|
30
+ object.delete 'MovieImg'
31
+ object.delete 'mId'
32
+ object['time'] = now
33
+ result << object
34
+ end
35
+ result
36
+ end
37
+
38
+ # 获取单日电影票房数据
39
+ # 数据来源:EBOT艺恩票房智库
40
+ # Parameters
41
+ # ------
42
+ # date:日期,默认为上一日
43
+ # return
44
+ # -------
45
+ # DataFrame
46
+ # AvgPrice 平均票价
47
+ # AvpPeoPle 场均人次
48
+ # BoxOffice 单日票房(万)
49
+ # BoxOffice_Up 环比变化 (%)
50
+ # IRank 排名
51
+ # MovieDay 上映天数
52
+ # MovieName 影片名
53
+ # SumBoxOffice 累计票房(万)
54
+ # WomIndex 口碑指数
55
+ def day_boxoffice(date = nil)
56
+ date = if date.nil?
57
+ 0
58
+ else
59
+ Date.today - Date.parse(date) + 1
60
+ end
61
+ url = format(BOXOFFICE_DAY, P_TYPE['http'], DOMAINS['mbox'], BOX, date,
62
+ _random)
63
+ resp = HTTParty.get url
64
+ return nil if resp.body.length < 15
65
+ json = JSON.parse resp
66
+ result = []
67
+ json['data1'].each do |object|
68
+ %w(MovieImg BoxOffice1 MovieID Director IRank_pro).each do |key|
69
+ object.delete key
70
+ end
71
+ result << object
72
+ end
73
+ result
74
+ end
75
+
76
+ # 获取单月电影票房数据
77
+ # 数据来源:EBOT艺恩票房智库
78
+ # Parameters
79
+ # ------
80
+ # date:日期,默认为上一月,格式YYYY-MM
81
+ # return
82
+ # -------
83
+ # DataFrame
84
+ # Irank 排名
85
+ # MovieName 电影名称
86
+ # WomIndex 口碑指数
87
+ # avgboxoffice 平均票价
88
+ # avgshowcount 场均人次
89
+ # box_pro 月度占比
90
+ # boxoffice 单月票房(万)
91
+ # days 月内天数
92
+ # releaseTime 上映日期
93
+ def month_boxoffice(date = nil)
94
+ date = Date.today.prev_month.strftime('%Y-%m') if date.nil?
95
+ raise BOX_INPUT_ERR_MSG if date.length > 8
96
+ date += '-01'
97
+ url = format(BOXOFFICE_MONTH, P_TYPE['http'], DOMAINS['mbox'], BOX,
98
+ date)
99
+ resp = HTTParty.get url
100
+ return nil if resp.body.length < 15
101
+ json = JSON.parse resp
102
+ result = []
103
+ json['data1'].each do |object|
104
+ %w(defaultImage EnMovieID).each do |key|
105
+ object.delete key
106
+ end
107
+ result << object
108
+ end
109
+ result
110
+ end
111
+
112
+ # 获取影院单日票房排行数据
113
+ # 数据来源:EBOT艺恩票房智库
114
+ # Parameters
115
+ # ------
116
+ # date:日期,默认为上一日
117
+ # return
118
+ # -------
119
+ # DataFrame
120
+ # Attendance 上座率
121
+ # AvgPeople 场均人次
122
+ # CinemaName 影院名称
123
+ # RowNum 排名
124
+ # TodayAudienceCount 当日观众人数
125
+ # TodayBox 当日票房
126
+ # TodayShowCount 当日场次
127
+ # price 场均票价(元)
128
+ def day_cinema(date = nil)
129
+ date = Date.today.prev_day.strftime('%F') if date.nil?
130
+ _write_head
131
+ result = []
132
+ 1.upto(11) do |page|
133
+ url = format(BOXOFFICE_CBD, P_TYPE['http'], DOMAINS['mbox'], BOX,
134
+ page, date)
135
+ resp = HTTParty.get url
136
+ next if resp.body.length < 15
137
+ json = JSON.parse resp.body
138
+ json['data1'].each do |object|
139
+ object.delete 'CinemaID'
140
+ result << object
141
+ end
142
+ end
143
+ result
144
+ end
145
+
146
+ private
147
+
148
+ def _random(n = 13)
149
+ start_integer = 10**(n - 1)
150
+ end_integer = (10**n) - 1
151
+ rand(start_integer..end_integer)
152
+ end
153
+ end
154
+ end
155
+ end
@@ -0,0 +1,197 @@
1
+ require 'tushare/util'
2
+
3
+ module Tushare
4
+ module Stock
5
+ # 龙虎榜数据
6
+ module Billboard
7
+ extend Tushare::Util
8
+ extend self
9
+
10
+ # 获取每日龙虎榜列表
11
+ # Parameters
12
+ # --------
13
+ # date:string
14
+ # 明细数据日期 format:YYYY-MM-DD 如果为空,返回最近一个交易日的数据
15
+
16
+ # Return
17
+ # ------
18
+ # DataFrame
19
+ # code:代码
20
+ # name :名称
21
+ # pchange:涨跌幅
22
+ # amount:龙虎榜成交额(万)
23
+ # buy:买入额(万)
24
+ # bratio:占总成交比例
25
+ # sell:卖出额(万)
26
+ # sratio :占总成交比例
27
+ # reason:上榜原因
28
+ # date :日期
29
+ def top_list(date = Time.now.strftime('%F'))
30
+ # TODO: 判断日期是否合法
31
+ url = format(LHB_URL, P_TYPE['http'], DOMAINS['em'], date, date)
32
+ resp = HTTParty.get(url)
33
+ string = resp.body.encode('utf-8', 'gbk').split('_1=').last
34
+ json = JSON.parse(string)
35
+ result = []
36
+ json['data'].each do |datum|
37
+ object = {}
38
+ LHB_COLS.each_with_index do |key, index|
39
+ object[key] = datum[LHB_TMP_COLS[index]]
40
+ if %w(buy sell amount Turnover).include? key
41
+ object[key] = (object[key].to_f / 10000).round(2)
42
+ end
43
+ end
44
+ object['bration'] = (object['buy'] / object['Turnover']).round(2)
45
+ object['sratio'] = (object['sell'] /object['Turnover']).round(2)
46
+ object['date'] = date
47
+ object.delete('Turnover')
48
+ result << object
49
+ end
50
+ result
51
+ end
52
+
53
+ # 获取个股上榜统计数据
54
+ # Parameters
55
+ # --------
56
+ # days:int
57
+ # 天数,统计n天以来上榜次数,默认为5天,其余是10、30、60
58
+ # retry_count : int, 默认 3
59
+ # 如遇网络等问题重复执行的次数
60
+ # pause : int, 默认 0
61
+ # 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
62
+ # Return
63
+ # ------
64
+ # DataFrame
65
+ # code:代码
66
+ # name:名称
67
+ # count:上榜次数
68
+ # bamount:累积购买额(万)
69
+ # samount:累积卖出额(万)
70
+ # net:净额(万)
71
+ # bcount:买入席位数
72
+ # scount:卖出席位数
73
+ def cap_tops(days = 5)
74
+ _check_lhb_input(days)
75
+ get_data(1, LHB_GGTJ_COLS, lambda do |p|
76
+ format(LHB_SINA_URL, P_TYPE['http'], DOMAINS['vsf'], LHB_KINDS[0],
77
+ PAGES['fd'], days, p)
78
+ end)
79
+ end
80
+
81
+ # 获取营业部上榜统计数据
82
+ # Parameters
83
+ # --------
84
+ # days:int
85
+ # 天数,统计n天以来上榜次数,默认为5天,其余是10、30、60
86
+ # retry_count : int, 默认 3
87
+ # 如遇网络等问题重复执行的次数
88
+ # pause : int, 默认 0
89
+ # 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
90
+ # Return
91
+ # ---------
92
+ # broker:营业部名称
93
+ # count:上榜次数
94
+ # bamount:累积购买额(万)
95
+ # bcount:买入席位数
96
+ # samount:累积卖出额(万)
97
+ # scount:卖出席位数
98
+ # top3:买入前三股票
99
+ def broker_tops(days = 5)
100
+ _check_lhb_input(days)
101
+ get_data(1, LHB_YYTJ_COLS, lambda do |p|
102
+ format(LHB_SINA_URL, P_TYPE['http'], DOMAINS['vsf'], LHB_KINDS[1],
103
+ PAGES['fd'], days, p)
104
+ end)
105
+ end
106
+
107
+ # 获取机构席位追踪统计数据
108
+ # Parameters
109
+ # --------
110
+ # days:int
111
+ # 天数,统计n天以来上榜次数,默认为5天,其余是10、30、60
112
+ # retry_count : int, 默认 3
113
+ # 如遇网络等问题重复执行的次数
114
+ # pause : int, 默认 0
115
+ # 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
116
+
117
+ # Return
118
+ # --------
119
+ # code:代码
120
+ # name:名称
121
+ # bamount:累积买入额(万)
122
+ # bcount:买入次数
123
+ # samount:累积卖出额(万)
124
+ # scount:卖出次数
125
+ # net:净额(万)
126
+ def inst_tops(days = 5)
127
+ _check_lhb_input(days)
128
+ get_data(
129
+ 1,
130
+ LHB_JGZZ_COLS,
131
+ lambda do |p|
132
+ format(LHB_SINA_URL, P_TYPE['http'], DOMAINS['vsf'], LHB_KINDS[2],
133
+ PAGES['fd'], days, p)
134
+ end,
135
+ lambda do |tds|
136
+ tds = tds.to_a
137
+ tds[0..1].concat tds[4..8]
138
+ end)
139
+ end
140
+
141
+ # 获取最近一个交易日机构席位成交明细统计数据
142
+ # Parameters
143
+ # --------
144
+ # retry_count : int, 默认 3
145
+ # 如遇网络等问题重复执行的次数
146
+ # pause : int, 默认 0
147
+ # 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
148
+
149
+ # Return
150
+ # ----------
151
+ # code:股票代码
152
+ # name:股票名称
153
+ # date:交易日期
154
+ # bamount:机构席位买入额(万)
155
+ # samount:机构席位卖出额(万)
156
+ # type:类型
157
+ def inst_detail
158
+ get_data(
159
+ 1,
160
+ LHB_JGMX_COLS,
161
+ lambda do |p|
162
+ format(LHB_SINA_URL, P_TYPE['http'], DOMAINS['vsf'], LHB_KINDS[3],
163
+ PAGES['fd'], '', p)
164
+ end)
165
+ end
166
+
167
+ private
168
+
169
+ def get_data(page, headers, url_generator, td_preprocessor = nil)
170
+ _write_head
171
+ result = []
172
+ loop do
173
+ url = url_generator.call(page)
174
+ _write_console
175
+ doc = Nokogiri::HTML(open(url), nil, 'gbk')
176
+ doc.css('table#dataTable > tr').each do |tr|
177
+ item = {}
178
+ tds = tr.css('td')
179
+ tds = td_preprocessor.call(tds) if td_preprocessor
180
+ tds.each_with_index do |td, index|
181
+ item[headers[index]] = td.content if headers[index]
182
+ end
183
+ result << item
184
+ end
185
+ next_page = doc.css('div.pages > a:last').css('a.nolink')
186
+ break unless next_page.empty?
187
+ page += 1
188
+ end
189
+ result
190
+ end
191
+
192
+ def _check_lhb_input(last)
193
+ raise LHB_MSG unless [5, 10, 30, 60].include? last
194
+ end
195
+ end
196
+ end
197
+ end
@@ -0,0 +1,288 @@
1
+ require 'tushare/util'
2
+
3
+ module Tushare
4
+ module Stock
5
+ # 获取股票分类数据接口
6
+ module Classifying
7
+ extend Tushare::Util
8
+ extend self
9
+
10
+ # 获取行业分类数据
11
+ # Parameters
12
+ # ----------
13
+ # standard
14
+ # sina:新浪行业 sw:申万 行业
15
+
16
+ # Returns
17
+ # -------
18
+ # DataFrame
19
+ # code :股票代码
20
+ # name :股票名称
21
+ # c_name :行业名称
22
+ def industry_classified(standard = 'sina')
23
+ _write_head
24
+ url = format(SINA_INDUSTRY_INDEX_URL, P_TYPE['http'], DOMAINS['vsf'],
25
+ standard == 'sw' ? PAGES['ids_sw'] : PAGES['ids'])
26
+ type_data = fetch_type_data(url)
27
+ result = []
28
+ type_data.keys.each do |type|
29
+ result.concat fetch_detail(type, type_data[type])
30
+ end
31
+ result
32
+ end
33
+
34
+ # 获取概念分类数据
35
+ # Return
36
+ # --------
37
+ # DataFrame
38
+ # code :股票代码
39
+ # name :股票名称
40
+ # c_name :概念名称
41
+ def concept_classified
42
+ _write_head
43
+ url = format(SINA_CONCEPTS_INDEX_URL, P_TYPE['http'], DOMAINS['sf'],
44
+ PAGES['cpt'])
45
+ type_data = fetch_type_data(url)
46
+ result = []
47
+ type_data.keys.each do |type|
48
+ result.concat fetch_detail(type, type_data[type])
49
+ end
50
+ result
51
+ end
52
+
53
+ # 获取地域分类数据
54
+ # Return
55
+ # --------
56
+ # DataFrame
57
+ # code :股票代码
58
+ # name :股票名称
59
+ # area :地域名称
60
+ def area_classified
61
+ basics = Tushare::Stock::Fundamental.get_stock_basics
62
+ result = []
63
+ basics.each do |basic|
64
+ result << { 'code' => basic['code'],
65
+ 'name' => basic['name'],
66
+ 'area' => basic['area'] }
67
+ end
68
+ result.sort_by { |object| object['area'] || '' }
69
+ end
70
+
71
+ # 获取创业板股票
72
+ # Return
73
+ # --------
74
+ # DataFrame
75
+ # code :股票代码
76
+ # name :股票名称
77
+ def gem_classified
78
+ basics = Tushare::Stock::Fundamental.get_stock_basics
79
+ basics.select! { |basic| basic['code'][0] == '3' }
80
+ result = []
81
+ basics.each do |basic|
82
+ result << { 'code' => basic['code'], 'name' => basic['name'] }
83
+ end
84
+ result.sort_by { |object| object['code'] || '' }
85
+ end
86
+
87
+ # 获取中小板股票
88
+ # Return
89
+ # --------
90
+ # DataFrame
91
+ # code :股票代码
92
+ # name :股票名称
93
+ def sme_classified
94
+ basics = Tushare::Stock::Fundamental.get_stock_basics
95
+ basics.select! { |basic| basic['code'][0..2] == '002' }
96
+ result = []
97
+ basics.each do |basic|
98
+ result << { 'code' => basic['code'], 'name' => basic['name'] }
99
+ end
100
+ result.sort_by { |object| object['code'] || '' }
101
+ end
102
+
103
+ # 获取风险警示板股票
104
+ # Return
105
+ # --------
106
+ # DataFrame
107
+ # code :股票代码
108
+ # name :股票名称
109
+ def st_classified
110
+ basics = Tushare::Stock::Fundamental.get_stock_basics
111
+ basics.select! { |basic| basic['name'].include? 'ST' }
112
+ result = []
113
+ basics.each do |basic|
114
+ result << { 'code' => basic['code'], 'name' => basic['name'] }
115
+ end
116
+ result.sort_by { |object| object['code'] || '' }
117
+ end
118
+
119
+ # 获取沪深300当前成份股及所占权重
120
+ # Return
121
+ # --------
122
+ # DataFrame
123
+ # code :股票代码
124
+ # name :股票名称
125
+ # date :日期
126
+ # weight:权重
127
+ def hs300s
128
+ url = format(HS300_CLASSIFY_URL_FTP, P_TYPE['ftp'], DOMAINS['idxip'],
129
+ PAGES['hs300w'])
130
+ fetch_ftp_file url do |file|
131
+ xls = ::Roo::Spreadsheet.open(file, extension: 'xls')
132
+ result = []
133
+ xls.sheet(0).drop(1).each do |row|
134
+ result << { 'date' => row[0],
135
+ 'code' => row[3],
136
+ 'weight' => row[6] }
137
+ end
138
+ basics = Tushare::Stock::Fundamental.get_stock_basics
139
+ result.each do |object|
140
+ find_result = basics.find { |basic| basic['code'] == object['code'] }
141
+ object[name] = find_result['name']
142
+ end
143
+ result
144
+ end
145
+ end
146
+
147
+ # 获取上证50成份股
148
+ # Return
149
+ # --------
150
+ # DataFrame
151
+ # code :股票代码
152
+ # name :股票名称
153
+ def sz50s
154
+ url = format(HS300_CLASSIFY_URL_FTP, P_TYPE['ftp'], DOMAINS['idxip'],
155
+ PAGES['sz50b'])
156
+ fetch_ftp_file url do |file|
157
+ xls = ::Roo::Spreadsheet.open(file, extension: 'xls')
158
+ result = []
159
+ xls.sheet(0).drop(1).each do |row|
160
+ result << { 'code' => row[0],
161
+ 'name' => row[1] }
162
+ end
163
+ result
164
+ end
165
+ end
166
+
167
+ # 获取中证500成份股
168
+ # Return
169
+ # --------
170
+ # DataFrame
171
+ # code :股票代码
172
+ # name :股票名称
173
+ def zz500s
174
+ url = format(HS300_CLASSIFY_URL_FTP, P_TYPE['ftp'], DOMAINS['idxip'],
175
+ PAGES['zz500wt'])
176
+ fetch_ftp_file url do |file|
177
+ xls = ::Roo::Spreadsheet.open(file, extension: 'xls')
178
+ result = []
179
+ xls.sheet(0).drop(1).each do |row|
180
+ result << { 'date' => row[0],
181
+ 'code' => row[3],
182
+ 'weight' => row[6] }
183
+ end
184
+ basics = Tushare::Stock::Fundamental.get_stock_basics
185
+ result.each do |object|
186
+ find_result = basics.find { |basic| basic['code'] == object['code'] }
187
+ object[name] = find_result['name']
188
+ end
189
+ end
190
+ end
191
+
192
+ # 获取终止上市股票列表
193
+ # Return
194
+ # --------
195
+ # DataFrame
196
+ # code :股票代码
197
+ # name :股票名称
198
+ # oDate:上市日期
199
+ # tDate:终止上市日期
200
+ def terminated
201
+ ref = format(SSEQ_CQ_REF_URL, P_TYPE['http'], DOMAINS['sse'])
202
+ url = format(TERMINATED_URL, P_TYPE['http'], DOMAINS['sseq'],
203
+ PAGES['ssecq'], _random(5), _random)
204
+ resp = HTTParty.get(
205
+ url,
206
+ headers: { 'Referer' => ref },
207
+ cookies: MAR_SH_COOKIES
208
+ )
209
+ json = JSON.parse(resp.body.sub(/jsonpCallback\d+\(/, '')[0..-2])['pageHelp']
210
+ result = []
211
+ json['data'].each do |datum|
212
+ object = {}
213
+ TERMINATED_T_COLS.each_with_index do |key, index|
214
+ object[TERMINATED_COLS[index]] = datum[key]
215
+ end
216
+ result << object
217
+ end
218
+ result
219
+ end
220
+
221
+ # 获取暂停上市股票列表
222
+ # Return
223
+ # --------
224
+ # DataFrame
225
+ # code :股票代码
226
+ # name :股票名称
227
+ # oDate:上市日期
228
+ # tDate:终止上市日期
229
+ def suspended
230
+ ref = format(SSEQ_CQ_REF_URL, P_TYPE['http'], DOMAINS['sse'])
231
+ url = format(SUSPENDED_URL, P_TYPE['http'], DOMAINS['sseq'],
232
+ PAGES['ssecq'], _random(5), _random)
233
+ resp = HTTParty.get(
234
+ url,
235
+ headers: { 'Referer' => ref },
236
+ cookies: MAR_SH_COOKIES
237
+ )
238
+ json = JSON.parse(resp.body.sub(/jsonpCallback\d+\(/, '')[0..-2])['pageHelp']
239
+ result = []
240
+ json['data'].each do |datum|
241
+ object = {}
242
+ TERMINATED_T_COLS.each_with_index do |key, index|
243
+ object[TERMINATED_COLS[index]] = datum[key]
244
+ end
245
+ result << object
246
+ end
247
+ result
248
+ end
249
+
250
+ private
251
+
252
+ def fetch_detail(type, type_name)
253
+ _write_console
254
+ url = format(SINA_DATA_DETAIL_URL, P_TYPE['http'], DOMAINS['vsf'],
255
+ PAGES['jv'], type)
256
+ resp = HTTParty.get(url)
257
+ json = JSON.parse(resp.body.encode('utf-8', 'gbk').gsub(/,(.*?):/) { |s| ",\"#{s[1..-2]}\":" }.gsub('"{symbol', '{"symbol').gsub('{symbol', '{"symbol"'))
258
+ result = []
259
+ json.each do |row|
260
+ object = {}
261
+ FOR_CLASSIFY_B_COLS.each do |field|
262
+ object[field] = row[field]
263
+ end
264
+ object['c_name'] = type_name
265
+ result << object
266
+ end
267
+ result
268
+ end
269
+
270
+ def fetch_type_data(url)
271
+ resp = HTTParty.get(url)
272
+ json = JSON.parse(resp.body.encode('utf-8', 'gbk').split('=').last)
273
+ result = {}
274
+ json.values.each do |row|
275
+ arr = row.split(',')
276
+ result[arr[0]] = arr[1]
277
+ end
278
+ result
279
+ end
280
+
281
+ def _random(n = 13)
282
+ start_int = 10**(n-1)
283
+ end_int = (10**n) - 1
284
+ rand(start_int..end_int).to_s
285
+ end
286
+ end
287
+ end
288
+ end