finmind 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- FinMind/__init__.py +2 -0
- FinMind/_version.py +6 -0
- FinMind/crawler/__init__.py +2 -0
- FinMind/crawler/base.py +165 -0
- FinMind/crawler/commodities.py +153 -0
- FinMind/crawler/demo.py +32 -0
- FinMind/crawler/government_bonds.py +207 -0
- FinMind/data/__init__.py +5 -0
- FinMind/data/data_loader.py +2246 -0
- FinMind/data/data_subscriber.py +116 -0
- FinMind/data/finmind_api.py +342 -0
- FinMind/indicators/__init__.py +29 -0
- FinMind/indicators/bias.py +30 -0
- FinMind/indicators/continue_holding.py +14 -0
- FinMind/indicators/institutional_investors_follower.py +76 -0
- FinMind/indicators/institutional_investors_over_buy.py +56 -0
- FinMind/indicators/kd.py +34 -0
- FinMind/indicators/kd_crossover.py +62 -0
- FinMind/indicators/ma_cross_orver.py +69 -0
- FinMind/indicators/short_sale_margin_purchase_ratio.py +51 -0
- FinMind/plotting/__init__.py +4 -0
- FinMind/plotting/bar.py +65 -0
- FinMind/plotting/kline.py +402 -0
- FinMind/plotting/line.py +64 -0
- FinMind/plotting/pie.py +53 -0
- FinMind/schema/__init__.py +6 -0
- FinMind/schema/data.py +151 -0
- FinMind/schema/indicators.py +94 -0
- FinMind/schema/info.py +40 -0
- FinMind/schema/plot.py +56 -0
- FinMind/schema/rule.py +7 -0
- FinMind/strategies/__init__.py +29 -0
- FinMind/strategies/base.py +791 -0
- FinMind/strategies/bias.py +39 -0
- FinMind/strategies/continue_holding.py +22 -0
- FinMind/strategies/institutional_investors_follower.py +85 -0
- FinMind/strategies/kd.py +32 -0
- FinMind/strategies/kd_crossover.py +26 -0
- FinMind/strategies/ma_crossover.py +35 -0
- FinMind/strategies/macd_crossover.py +46 -0
- FinMind/strategies/max_min_period_bias.py +62 -0
- FinMind/strategies/naive_kd.py +39 -0
- FinMind/strategies/short_sale_margin_purchase_ratio.py +111 -0
- FinMind/strategies/utils.py +50 -0
- FinMind/templates/post.html +27 -0
- FinMind/utility/__init__.py +0 -0
- FinMind/utility/request.py +111 -0
- FinMind/utility/rule.py +20 -0
- finmind-1.8.0.dist-info/LICENSE +201 -0
- finmind-1.8.0.dist-info/METADATA +176 -0
- finmind-1.8.0.dist-info/RECORD +53 -0
- finmind-1.8.0.dist-info/WHEEL +5 -0
- finmind-1.8.0.dist-info/top_level.txt +1 -0
FinMind/__init__.py
ADDED
FinMind/_version.py
ADDED
FinMind/crawler/base.py
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import requests
|
|
6
|
+
|
|
7
|
+
USER_AGENT = (
|
|
8
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
9
|
+
"Chrome/68.0.3440.84 Safari/537.36"
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def requests_get(url, header):
|
|
14
|
+
try:
|
|
15
|
+
res = requests.get(url, verify=False, timeout=10, headers=header)
|
|
16
|
+
return res
|
|
17
|
+
except Exception as e:
|
|
18
|
+
if "Max retries exceeded" in str(e) or "Read timed out" in str(e):
|
|
19
|
+
time.sleep(60)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def requests_post(url, header, form_data):
|
|
23
|
+
try:
|
|
24
|
+
res = requests.post(
|
|
25
|
+
url, verify=False, timeout=10, headers=header, data=form_data
|
|
26
|
+
)
|
|
27
|
+
return res
|
|
28
|
+
except Exception as e:
|
|
29
|
+
if "Max retries exceeded" in str(e) or "Read timed out" in str(e):
|
|
30
|
+
time.sleep(60)
|
|
31
|
+
raise
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_time():
|
|
35
|
+
return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class BaseCrawler:
|
|
39
|
+
@staticmethod
|
|
40
|
+
def date2days(date):
|
|
41
|
+
# date = '2018-08-03'
|
|
42
|
+
date = datetime.datetime.strptime(date, "%Y-%m-%d").date()
|
|
43
|
+
value = (date - datetime.date(1970, 1, 1)).days
|
|
44
|
+
value = value * 60 * 60 * 24 * 1000
|
|
45
|
+
return value
|
|
46
|
+
|
|
47
|
+
@staticmethod
|
|
48
|
+
def days2date(day):
|
|
49
|
+
# day = 631497600000
|
|
50
|
+
# 60s = 1min
|
|
51
|
+
# 60min = 1hr
|
|
52
|
+
day = int(day)
|
|
53
|
+
day = int(day / 1000 / 60 / 60 / 24)
|
|
54
|
+
value = datetime.date(1970, 1, 1) + datetime.timedelta(days=day)
|
|
55
|
+
return value
|
|
56
|
+
|
|
57
|
+
def millisecond2date(self, ms):
|
|
58
|
+
# ms = 1559489350000
|
|
59
|
+
ms = int(ms)
|
|
60
|
+
date = str(self.days2date(ms))
|
|
61
|
+
days = int(ms / 1000 / 60 / 60 / 24)
|
|
62
|
+
|
|
63
|
+
second = ms / 1000 - days * 60 * 60 * 24
|
|
64
|
+
hour = int(second / 60 / 60)
|
|
65
|
+
minute = int((second - hour * 60 * 60) / 60) - 1
|
|
66
|
+
|
|
67
|
+
if hour < 0:
|
|
68
|
+
hour = "00"
|
|
69
|
+
elif hour < 10:
|
|
70
|
+
hour = "0" + str(hour)
|
|
71
|
+
else:
|
|
72
|
+
hour = str(hour)
|
|
73
|
+
|
|
74
|
+
if minute < 0:
|
|
75
|
+
minute = "00"
|
|
76
|
+
elif minute < 10:
|
|
77
|
+
minute = "0" + str(minute)
|
|
78
|
+
else:
|
|
79
|
+
minute = str(minute)
|
|
80
|
+
# second = (second - hour*60*60 - minute*60)#hour
|
|
81
|
+
value = date + " " + hour + ":" + minute + ":00"
|
|
82
|
+
return value
|
|
83
|
+
|
|
84
|
+
def millisecond2date2(self, ms):
|
|
85
|
+
# ms = 1566898740000
|
|
86
|
+
ms = int(ms)
|
|
87
|
+
date = str(self.days2date(ms))
|
|
88
|
+
days = int(ms / 1000 / 60 / 60 / 24)
|
|
89
|
+
|
|
90
|
+
second = ms / 1000 - days * 60 * 60 * 24
|
|
91
|
+
hour = int(second / 60 / 60)
|
|
92
|
+
minute = int((second - hour * 60 * 60) / 60)
|
|
93
|
+
second = int(second - hour * 60 * 60 - minute * 60)
|
|
94
|
+
|
|
95
|
+
if hour < 0:
|
|
96
|
+
hour = "00"
|
|
97
|
+
elif hour < 10:
|
|
98
|
+
hour = "0" + str(hour)
|
|
99
|
+
else:
|
|
100
|
+
hour = str(hour)
|
|
101
|
+
|
|
102
|
+
if minute < 0:
|
|
103
|
+
minute = "00"
|
|
104
|
+
elif minute < 10:
|
|
105
|
+
minute = "0" + str(minute)
|
|
106
|
+
else:
|
|
107
|
+
minute = str(minute)
|
|
108
|
+
# second = (second - hour*60*60 - minute*60)#hour
|
|
109
|
+
if second < 10:
|
|
110
|
+
second = "0{}".format(second)
|
|
111
|
+
value = "{} {}:{}:{}".format(date, hour, minute, second)
|
|
112
|
+
return value
|
|
113
|
+
|
|
114
|
+
@staticmethod
|
|
115
|
+
def date2millisecond(date):
|
|
116
|
+
# date = '2019-06-02 15:30:00'
|
|
117
|
+
date = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S")
|
|
118
|
+
date = date + datetime.timedelta(minutes=1)
|
|
119
|
+
second = date - datetime.datetime(1970, 1, 1, 0, 0, 0)
|
|
120
|
+
|
|
121
|
+
second = second.days * 24 * 60 * 60 + second.seconds
|
|
122
|
+
# ms = ms*1000
|
|
123
|
+
|
|
124
|
+
return second
|
|
125
|
+
|
|
126
|
+
@staticmethod
|
|
127
|
+
def create_date(start, today=False): # start = '2018-07-31'
|
|
128
|
+
start = datetime.datetime.strptime(
|
|
129
|
+
start, "%Y-%m-%d"
|
|
130
|
+
).date() + datetime.timedelta(days=1)
|
|
131
|
+
end = datetime.date.today()
|
|
132
|
+
|
|
133
|
+
day_len = (end - start).days
|
|
134
|
+
if today:
|
|
135
|
+
day_len = (end - start).days + 1
|
|
136
|
+
date = [
|
|
137
|
+
str(start + datetime.timedelta(days=dat)) for dat in range(day_len)
|
|
138
|
+
]
|
|
139
|
+
return date
|
|
140
|
+
|
|
141
|
+
@staticmethod
|
|
142
|
+
def remove_outlier(data, var_name):
|
|
143
|
+
|
|
144
|
+
value = list(data[var_name])
|
|
145
|
+
mean = np.mean(value, axis=0)
|
|
146
|
+
sd = np.std(value, axis=0)
|
|
147
|
+
if sd < 1:
|
|
148
|
+
return data
|
|
149
|
+
|
|
150
|
+
_bool = []
|
|
151
|
+
for x in value:
|
|
152
|
+
if (5 * mean) > x > (-5 * mean):
|
|
153
|
+
_bool.append(True)
|
|
154
|
+
else:
|
|
155
|
+
_bool.append(False)
|
|
156
|
+
|
|
157
|
+
data = data[_bool]
|
|
158
|
+
return data
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def change_chinese_date_us(d):
|
|
162
|
+
y, m, d = [int(x) for x in d.split("/")]
|
|
163
|
+
y = y + 1911
|
|
164
|
+
date = datetime.date(y, m, d)
|
|
165
|
+
return date
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
import requests
|
|
8
|
+
from lxml import etree
|
|
9
|
+
|
|
10
|
+
from FinMind.crawler.base import BaseCrawler, USER_AGENT
|
|
11
|
+
|
|
12
|
+
PATH = "/".join(os.path.abspath(__file__).split("/")[:-2])
|
|
13
|
+
sys.path.append(PATH)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class CommoditiesCrawler(BaseCrawler):
|
|
17
|
+
def __init__(self):
|
|
18
|
+
super(CommoditiesCrawler, self).__init__()
|
|
19
|
+
|
|
20
|
+
@staticmethod
|
|
21
|
+
def create_loop_list():
|
|
22
|
+
# self.based_url = 'https://www.investing.com/commodities/'
|
|
23
|
+
kind_list = ["meats", "grains", "energies", "softs", "metals"]
|
|
24
|
+
loop_list = []
|
|
25
|
+
for kind in kind_list:
|
|
26
|
+
print(kind)
|
|
27
|
+
index_url = "https://www.investing.com/commodities/" + kind
|
|
28
|
+
headers = {
|
|
29
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
|
30
|
+
"Accept-Encoding": "gzip, deflate, br",
|
|
31
|
+
"Accept-Language": "zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7",
|
|
32
|
+
"Cache-Control": "max-age=0",
|
|
33
|
+
"Connection": "keep-alive",
|
|
34
|
+
"Host": "www.investing.com",
|
|
35
|
+
"Upgrade-Insecure-Requests": "1",
|
|
36
|
+
"User-Agent": USER_AGENT,
|
|
37
|
+
}
|
|
38
|
+
res = requests.get(index_url, verify=True, headers=headers)
|
|
39
|
+
|
|
40
|
+
tem = re.findall(
|
|
41
|
+
' data-name="[A-Za-z ]+" data-id="[0-9]+" ', res.text
|
|
42
|
+
)
|
|
43
|
+
futures_id_list = [re.findall("[0-9]+", te)[0] for te in tem]
|
|
44
|
+
data_name_list = [
|
|
45
|
+
re.findall('"[A-Za-z ]+"', te)[0].replace('"', "") for te in tem
|
|
46
|
+
]
|
|
47
|
+
[
|
|
48
|
+
loop_list.append([futures_id_list[i], data_name_list[i]])
|
|
49
|
+
for i in range(len(futures_id_list))
|
|
50
|
+
]
|
|
51
|
+
return loop_list
|
|
52
|
+
|
|
53
|
+
@staticmethod
|
|
54
|
+
def get_end_date():
|
|
55
|
+
end_date = datetime.datetime.now().date()
|
|
56
|
+
end_date = end_date + datetime.timedelta(-1)
|
|
57
|
+
y = str(end_date.year)
|
|
58
|
+
m = (
|
|
59
|
+
str(end_date.month)
|
|
60
|
+
if end_date.month > 9
|
|
61
|
+
else "0" + str(end_date.month)
|
|
62
|
+
)
|
|
63
|
+
d = str(end_date.day) if end_date.day > 9 else "0" + str(end_date.day)
|
|
64
|
+
|
|
65
|
+
return m + "/" + d + "/" + y
|
|
66
|
+
|
|
67
|
+
def crawler(self, loop): # loop = ['49769', 'Brent Oil Futures']
|
|
68
|
+
def get_value(template):
|
|
69
|
+
|
|
70
|
+
date = int(template[0].attrib["data-real-value"])
|
|
71
|
+
date = int(date / 60 / 60 / 24)
|
|
72
|
+
date = str(
|
|
73
|
+
datetime.date(1970, 1, 1) + datetime.timedelta(days=date)
|
|
74
|
+
)
|
|
75
|
+
v = [
|
|
76
|
+
float(
|
|
77
|
+
template[template_index]
|
|
78
|
+
.attrib["data-real-value"]
|
|
79
|
+
.replace(",", "")
|
|
80
|
+
)
|
|
81
|
+
for template_index in range(1, 6)
|
|
82
|
+
]
|
|
83
|
+
_price, _open, _high, _low, _vol = v
|
|
84
|
+
|
|
85
|
+
change = (
|
|
86
|
+
float(template[6].text.replace("%", "").replace(",", "")) / 100
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
return pd.DataFrame(
|
|
90
|
+
[date, _price, _open, _high, _low, _vol, change]
|
|
91
|
+
).T
|
|
92
|
+
|
|
93
|
+
# -------------------------------------------------------------------
|
|
94
|
+
futures_id, data_name = loop
|
|
95
|
+
header = data_name + " Historical data"
|
|
96
|
+
st_date, end_date = (
|
|
97
|
+
"01/01/1970",
|
|
98
|
+
self.get_end_date(),
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
bonds_url = "https://www.investing.com/instruments/HistoricalDataAjax"
|
|
102
|
+
form_data = {
|
|
103
|
+
"curr_id": futures_id,
|
|
104
|
+
"header": header,
|
|
105
|
+
"st_date": st_date,
|
|
106
|
+
"end_date": end_date,
|
|
107
|
+
"interval_sec": "Daily",
|
|
108
|
+
"sort_col": "date",
|
|
109
|
+
"sort_ord": "DESC",
|
|
110
|
+
"action": "historical_data",
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
headers = {
|
|
114
|
+
"Accept": "text/plain, */*; q=0.01",
|
|
115
|
+
"Accept-Encoding": "gzip, deflate, br",
|
|
116
|
+
"Accept-Language": "zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7",
|
|
117
|
+
"Connection": "keep-alive",
|
|
118
|
+
"Content-Length": "183",
|
|
119
|
+
"Content-Type": "application/x-www-form-urlencoded",
|
|
120
|
+
"Host": "www.investing.com",
|
|
121
|
+
"Origin": "https://www.investing.com",
|
|
122
|
+
"Referer": "https://www.investing.com/commodities/brent-oil-historical-data",
|
|
123
|
+
"User-Agent": USER_AGENT,
|
|
124
|
+
"X-Requested-With": "XMLHttpRequest",
|
|
125
|
+
}
|
|
126
|
+
print("requests post")
|
|
127
|
+
res = requests.post(
|
|
128
|
+
bonds_url, verify=True, headers=headers, data=form_data
|
|
129
|
+
)
|
|
130
|
+
print("data clean")
|
|
131
|
+
page = etree.HTML(res.text)
|
|
132
|
+
col_name = page.xpath("//tr//th")
|
|
133
|
+
|
|
134
|
+
col_name = [
|
|
135
|
+
c.text.replace(" %", "Percent").replace(".", "") for c in col_name
|
|
136
|
+
]
|
|
137
|
+
col_name = ["date" if c == "Date" else c for c in col_name]
|
|
138
|
+
|
|
139
|
+
data = pd.DataFrame()
|
|
140
|
+
td_path = page.xpath("//tr//td")
|
|
141
|
+
for i in range(0, len(td_path) - 7, 7):
|
|
142
|
+
tem = td_path[i : i + 7]
|
|
143
|
+
value = get_value(tem)
|
|
144
|
+
data = data.append(value)
|
|
145
|
+
|
|
146
|
+
if len(data) > 0:
|
|
147
|
+
data.columns = col_name
|
|
148
|
+
data["name"] = data_name
|
|
149
|
+
# data['data_id'] = futures_id
|
|
150
|
+
data = data.sort_values("date")
|
|
151
|
+
data.index = range(len(data))
|
|
152
|
+
|
|
153
|
+
return data
|
FinMind/crawler/demo.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
|
|
3
|
+
from FinMind.crawler import GovernmentBondsCrawler, CommoditiesCrawler
|
|
4
|
+
|
|
5
|
+
# -------------------------------------------------------------------
|
|
6
|
+
commodities_crawler = CommoditiesCrawler()
|
|
7
|
+
# get futures list
|
|
8
|
+
loop_list = commodities_crawler.create_loop_list()
|
|
9
|
+
|
|
10
|
+
# get one futures data
|
|
11
|
+
commodities_df = commodities_crawler.crawler(loop_list[0])
|
|
12
|
+
|
|
13
|
+
# or get all futures data
|
|
14
|
+
commodities_df = pd.DataFrame()
|
|
15
|
+
for loop in loop_list:
|
|
16
|
+
print(loop)
|
|
17
|
+
value = commodities_crawler.crawler(loop)
|
|
18
|
+
commodities_df = commodities_df.append(value)
|
|
19
|
+
# -------------------------------------------------------------------
|
|
20
|
+
gb_crawler = GovernmentBondsCrawler()
|
|
21
|
+
# get futures list
|
|
22
|
+
loop_list = gb_crawler.create_loop_list()
|
|
23
|
+
|
|
24
|
+
# get one futures data
|
|
25
|
+
gd_df = gb_crawler.crawler(loop_list[0])
|
|
26
|
+
|
|
27
|
+
# or get all futures data
|
|
28
|
+
gd_df = pd.DataFrame()
|
|
29
|
+
for loop in loop_list:
|
|
30
|
+
print(loop)
|
|
31
|
+
value = gb_crawler.crawler(loop)
|
|
32
|
+
gd_df = gd_df.append(value)
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"""
|
|
2
|
+
政府債券
|
|
3
|
+
G8-俄羅斯、美國、加拿大、英國、法國、德國、義大利及日本
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import datetime
|
|
7
|
+
import os
|
|
8
|
+
import re
|
|
9
|
+
import sys
|
|
10
|
+
|
|
11
|
+
import pandas as pd
|
|
12
|
+
import requests
|
|
13
|
+
from lxml import etree
|
|
14
|
+
|
|
15
|
+
from FinMind.crawler.base import BaseCrawler, USER_AGENT
|
|
16
|
+
|
|
17
|
+
PATH = "/".join(os.path.abspath(__file__).split("/")[:-2])
|
|
18
|
+
sys.path.append(PATH)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class GovernmentBondsCrawler(BaseCrawler):
|
|
22
|
+
@staticmethod
|
|
23
|
+
def create_loop_list():
|
|
24
|
+
def get_data_id_name(url):
|
|
25
|
+
|
|
26
|
+
headers = {
|
|
27
|
+
"Accept": "*/*",
|
|
28
|
+
"Accept-Encoding": "gzip, deflate, br",
|
|
29
|
+
"Accept-Language": "zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7",
|
|
30
|
+
"Connection": "keep-alive",
|
|
31
|
+
"Host": "www.investing.com",
|
|
32
|
+
"Referer": url,
|
|
33
|
+
"User-Agent": USER_AGENT,
|
|
34
|
+
"X-Requested-With": "XMLHttpRequest",
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
res = requests.get(url, verify=True, headers=headers)
|
|
38
|
+
tem_data_id = re.findall('data-id="[0-9]+"', res.text)
|
|
39
|
+
tem_data_id = [di.replace("data-id=", "") for di in tem_data_id]
|
|
40
|
+
page = etree.HTML(res.text)
|
|
41
|
+
_data_id = []
|
|
42
|
+
_data_name = []
|
|
43
|
+
for di in tem_data_id:
|
|
44
|
+
tem = page.xpath("//span[@data-id={}]".format(di))
|
|
45
|
+
if len(tem) > 0:
|
|
46
|
+
_data_id.append(tem[0].attrib["data-id"])
|
|
47
|
+
_data_name.append(tem[0].attrib["data-name"])
|
|
48
|
+
|
|
49
|
+
return _data_id, _data_name
|
|
50
|
+
|
|
51
|
+
def get_country_url():
|
|
52
|
+
index_url = "https://www.investing.com/rates-bonds/"
|
|
53
|
+
headers = {
|
|
54
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
|
55
|
+
"Accept-Encoding": "gzip, deflate, br",
|
|
56
|
+
"Accept-Language": "zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7",
|
|
57
|
+
"Cache-Control": "max-age=0",
|
|
58
|
+
"Connection": "keep-alive",
|
|
59
|
+
"Host": "www.investing.com",
|
|
60
|
+
"Upgrade-Insecure-Requests": "1",
|
|
61
|
+
"User-Agent": USER_AGENT,
|
|
62
|
+
}
|
|
63
|
+
res = requests.get(index_url, verify=True, headers=headers)
|
|
64
|
+
|
|
65
|
+
data_country_id = re.findall('data-country-id="[0-9]+"', res.text)
|
|
66
|
+
data_country_id = [
|
|
67
|
+
dci.replace("data-country-id=", "") for dci in data_country_id
|
|
68
|
+
]
|
|
69
|
+
page = etree.HTML(res.text)
|
|
70
|
+
tem = []
|
|
71
|
+
for dci in data_country_id:
|
|
72
|
+
tem.append(
|
|
73
|
+
page.xpath("//option[@data-country-id={}]".format(dci))[0]
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
url = [
|
|
77
|
+
"https://www.investing.com" + te.attrib["value"] for te in tem
|
|
78
|
+
]
|
|
79
|
+
# G8 and china
|
|
80
|
+
select = [
|
|
81
|
+
"canada",
|
|
82
|
+
"china",
|
|
83
|
+
"france",
|
|
84
|
+
"germany",
|
|
85
|
+
"japan",
|
|
86
|
+
"russia",
|
|
87
|
+
"uk",
|
|
88
|
+
"usa",
|
|
89
|
+
"italy",
|
|
90
|
+
]
|
|
91
|
+
countries_url = []
|
|
92
|
+
for url_index in range(len(url)):
|
|
93
|
+
tem = url[url_index].replace(
|
|
94
|
+
"https://www.investing.com/rates-bonds/", ""
|
|
95
|
+
)
|
|
96
|
+
tem = tem.replace("-government-bonds", "")
|
|
97
|
+
if tem in select:
|
|
98
|
+
countries_url.append(url[url_index])
|
|
99
|
+
return countries_url
|
|
100
|
+
|
|
101
|
+
# main
|
|
102
|
+
country_url = get_country_url()
|
|
103
|
+
loop_list = []
|
|
104
|
+
for curl in country_url: # curl = country_url[0]
|
|
105
|
+
print(curl)
|
|
106
|
+
data_id, data_name = get_data_id_name(curl)
|
|
107
|
+
for i in range(len(data_id)):
|
|
108
|
+
loop_list.append([data_id[i], data_name[i]])
|
|
109
|
+
|
|
110
|
+
return loop_list
|
|
111
|
+
|
|
112
|
+
@staticmethod
|
|
113
|
+
def get_end_date():
|
|
114
|
+
|
|
115
|
+
end_date = datetime.datetime.now().date()
|
|
116
|
+
end_date = end_date + datetime.timedelta(-1)
|
|
117
|
+
y = str(end_date.year)
|
|
118
|
+
m = (
|
|
119
|
+
str(end_date.month)
|
|
120
|
+
if end_date.month > 9
|
|
121
|
+
else "0" + str(end_date.month)
|
|
122
|
+
)
|
|
123
|
+
d = str(end_date.day) if end_date.day > 9 else "0" + str(end_date.day)
|
|
124
|
+
|
|
125
|
+
return "{}/{}/{}".format(m, d, y)
|
|
126
|
+
|
|
127
|
+
def crawler(self, loop): # loop = ['23681', 'Germany 3 Month']
|
|
128
|
+
def get_value(template):
|
|
129
|
+
|
|
130
|
+
date = int(template[0].attrib["data-real-value"])
|
|
131
|
+
date = int(date / 60 / 60 / 24)
|
|
132
|
+
date = str(
|
|
133
|
+
datetime.date(1970, 1, 1) + datetime.timedelta(days=date)
|
|
134
|
+
)
|
|
135
|
+
v = [
|
|
136
|
+
float(template[template_index].text)
|
|
137
|
+
for template_index in range(1, 5)
|
|
138
|
+
if template[template_index].text is not None
|
|
139
|
+
]
|
|
140
|
+
if len(v) == 0:
|
|
141
|
+
return pd.DataFrame()
|
|
142
|
+
_price, _open, _high, _low = v
|
|
143
|
+
change = (
|
|
144
|
+
float(template[5].text.replace("%", "").replace(",", "")) / 100
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
return pd.DataFrame([date, _price, _open, _high, _low, change]).T
|
|
148
|
+
|
|
149
|
+
cid, data_name = loop
|
|
150
|
+
header = data_name + " Bond Yield Historical data"
|
|
151
|
+
st_date, end_date = (
|
|
152
|
+
"01/01/1970",
|
|
153
|
+
self.get_end_date(),
|
|
154
|
+
)
|
|
155
|
+
bonds_url = "https://www.investing.com/instruments/HistoricalDataAjax"
|
|
156
|
+
|
|
157
|
+
form_data = {
|
|
158
|
+
"curr_id": cid,
|
|
159
|
+
"header": header,
|
|
160
|
+
"st_date": st_date,
|
|
161
|
+
"end_date": end_date,
|
|
162
|
+
"interval_sec": "Daily",
|
|
163
|
+
"sort_col": "date",
|
|
164
|
+
"sort_ord": "DESC",
|
|
165
|
+
"action": "historical_data",
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
headers = {
|
|
169
|
+
"Accept": "text/plain, */*; q=0.01",
|
|
170
|
+
"Accept-Encoding": "gzip, deflate, br",
|
|
171
|
+
"Accept-Language": "zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7",
|
|
172
|
+
"Connection": "keep-alive",
|
|
173
|
+
"Content-Length": "192",
|
|
174
|
+
"Content-Type": "application/x-www-form-urlencoded",
|
|
175
|
+
"Host": "www.investing.com",
|
|
176
|
+
"Origin": "https://www.investing.com",
|
|
177
|
+
"Referer": "https://www.investing.com/rates-bonds/france-1-month-bond-yield-historical-data",
|
|
178
|
+
"User-Agent": USER_AGENT,
|
|
179
|
+
"X-Requested-With": "XMLHttpRequest",
|
|
180
|
+
}
|
|
181
|
+
print("requests post")
|
|
182
|
+
res = requests.post(
|
|
183
|
+
bonds_url, verify=True, headers=headers, data=form_data
|
|
184
|
+
)
|
|
185
|
+
print("data clean")
|
|
186
|
+
page = etree.HTML(res.text)
|
|
187
|
+
tr_path = page.xpath("//tr")
|
|
188
|
+
|
|
189
|
+
col_name = [col.text for col in tr_path[0].xpath("//th")]
|
|
190
|
+
col_name = [c.replace(" %", "Percent") for c in col_name]
|
|
191
|
+
col_name = ["date" if c == "Date" else c for c in col_name]
|
|
192
|
+
data = pd.DataFrame()
|
|
193
|
+
td_path = page.xpath("//tr//td")
|
|
194
|
+
for i in range(0, len(td_path) - 6, 6):
|
|
195
|
+
tem = td_path[i : i + 6]
|
|
196
|
+
value = get_value(tem)
|
|
197
|
+
if len(value) > 0:
|
|
198
|
+
data = data.append(value)
|
|
199
|
+
|
|
200
|
+
if len(data) > 0:
|
|
201
|
+
data.columns = col_name
|
|
202
|
+
data["name"] = "{}".format(data_name)
|
|
203
|
+
# data['data_id'] = cid
|
|
204
|
+
data = data.sort_values("date")
|
|
205
|
+
data.index = range(len(data))
|
|
206
|
+
|
|
207
|
+
return data
|
FinMind/data/__init__.py
ADDED