finmind 1.9.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- finmind-1.9.12/FinMind/__init__.py +2 -0
- finmind-1.9.12/FinMind/_version.py +6 -0
- finmind-1.9.12/FinMind/crawler/__init__.py +2 -0
- finmind-1.9.12/FinMind/crawler/base.py +165 -0
- finmind-1.9.12/FinMind/crawler/commodities.py +153 -0
- finmind-1.9.12/FinMind/crawler/demo.py +32 -0
- finmind-1.9.12/FinMind/crawler/government_bonds.py +207 -0
- finmind-1.9.12/FinMind/data/__init__.py +5 -0
- finmind-1.9.12/FinMind/data/data_loader.py +2971 -0
- finmind-1.9.12/FinMind/data/data_subscriber.py +116 -0
- finmind-1.9.12/FinMind/data/finmind_api.py +425 -0
- finmind-1.9.12/FinMind/indicators/__init__.py +29 -0
- finmind-1.9.12/FinMind/indicators/bias.py +30 -0
- finmind-1.9.12/FinMind/indicators/continue_holding.py +14 -0
- finmind-1.9.12/FinMind/indicators/institutional_investors_follower.py +76 -0
- finmind-1.9.12/FinMind/indicators/institutional_investors_over_buy.py +56 -0
- finmind-1.9.12/FinMind/indicators/kd.py +34 -0
- finmind-1.9.12/FinMind/indicators/kd_crossover.py +62 -0
- finmind-1.9.12/FinMind/indicators/ma_cross_orver.py +69 -0
- finmind-1.9.12/FinMind/indicators/short_sale_margin_purchase_ratio.py +51 -0
- finmind-1.9.12/FinMind/plotting/__init__.py +4 -0
- finmind-1.9.12/FinMind/plotting/bar.py +65 -0
- finmind-1.9.12/FinMind/plotting/kline.py +402 -0
- finmind-1.9.12/FinMind/plotting/line.py +64 -0
- finmind-1.9.12/FinMind/plotting/pie.py +53 -0
- finmind-1.9.12/FinMind/schema/__init__.py +6 -0
- finmind-1.9.12/FinMind/schema/data.py +170 -0
- finmind-1.9.12/FinMind/schema/indicators.py +94 -0
- finmind-1.9.12/FinMind/schema/info.py +40 -0
- finmind-1.9.12/FinMind/schema/plot.py +56 -0
- finmind-1.9.12/FinMind/schema/rule.py +7 -0
- finmind-1.9.12/FinMind/strategies/__init__.py +29 -0
- finmind-1.9.12/FinMind/strategies/base.py +791 -0
- finmind-1.9.12/FinMind/strategies/bias.py +39 -0
- finmind-1.9.12/FinMind/strategies/continue_holding.py +22 -0
- finmind-1.9.12/FinMind/strategies/institutional_investors_follower.py +85 -0
- finmind-1.9.12/FinMind/strategies/kd.py +32 -0
- finmind-1.9.12/FinMind/strategies/kd_crossover.py +26 -0
- finmind-1.9.12/FinMind/strategies/ma_crossover.py +35 -0
- finmind-1.9.12/FinMind/strategies/macd_crossover.py +46 -0
- finmind-1.9.12/FinMind/strategies/max_min_period_bias.py +62 -0
- finmind-1.9.12/FinMind/strategies/naive_kd.py +39 -0
- finmind-1.9.12/FinMind/strategies/short_sale_margin_purchase_ratio.py +111 -0
- finmind-1.9.12/FinMind/strategies/utils.py +50 -0
- finmind-1.9.12/FinMind/templates/post.html +27 -0
- finmind-1.9.12/FinMind/utility/__init__.py +0 -0
- finmind-1.9.12/FinMind/utility/request.py +156 -0
- finmind-1.9.12/FinMind/utility/rule.py +20 -0
- finmind-1.9.12/LICENSE +201 -0
- finmind-1.9.12/PKG-INFO +392 -0
- finmind-1.9.12/README.md +157 -0
- finmind-1.9.12/finmind.egg-info/PKG-INFO +392 -0
- finmind-1.9.12/finmind.egg-info/SOURCES.txt +57 -0
- finmind-1.9.12/finmind.egg-info/dependency_links.txt +1 -0
- finmind-1.9.12/finmind.egg-info/requires.txt +19 -0
- finmind-1.9.12/finmind.egg-info/top_level.txt +4 -0
- finmind-1.9.12/pyproject.toml +58 -0
- finmind-1.9.12/setup.cfg +4 -0
- finmind-1.9.12/setup.py +5 -0
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import requests
|
|
6
|
+
|
|
7
|
+
USER_AGENT = (
|
|
8
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
9
|
+
"Chrome/68.0.3440.84 Safari/537.36"
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def requests_get(url, header):
|
|
14
|
+
try:
|
|
15
|
+
res = requests.get(url, verify=False, timeout=10, headers=header)
|
|
16
|
+
return res
|
|
17
|
+
except Exception as e:
|
|
18
|
+
if "Max retries exceeded" in str(e) or "Read timed out" in str(e):
|
|
19
|
+
time.sleep(60)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def requests_post(url, header, form_data):
|
|
23
|
+
try:
|
|
24
|
+
res = requests.post(
|
|
25
|
+
url, verify=False, timeout=10, headers=header, data=form_data
|
|
26
|
+
)
|
|
27
|
+
return res
|
|
28
|
+
except Exception as e:
|
|
29
|
+
if "Max retries exceeded" in str(e) or "Read timed out" in str(e):
|
|
30
|
+
time.sleep(60)
|
|
31
|
+
raise
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_time():
|
|
35
|
+
return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class BaseCrawler:
|
|
39
|
+
@staticmethod
|
|
40
|
+
def date2days(date):
|
|
41
|
+
# date = '2018-08-03'
|
|
42
|
+
date = datetime.datetime.strptime(date, "%Y-%m-%d").date()
|
|
43
|
+
value = (date - datetime.date(1970, 1, 1)).days
|
|
44
|
+
value = value * 60 * 60 * 24 * 1000
|
|
45
|
+
return value
|
|
46
|
+
|
|
47
|
+
@staticmethod
|
|
48
|
+
def days2date(day):
|
|
49
|
+
# day = 631497600000
|
|
50
|
+
# 60s = 1min
|
|
51
|
+
# 60min = 1hr
|
|
52
|
+
day = int(day)
|
|
53
|
+
day = int(day / 1000 / 60 / 60 / 24)
|
|
54
|
+
value = datetime.date(1970, 1, 1) + datetime.timedelta(days=day)
|
|
55
|
+
return value
|
|
56
|
+
|
|
57
|
+
def millisecond2date(self, ms):
|
|
58
|
+
# ms = 1559489350000
|
|
59
|
+
ms = int(ms)
|
|
60
|
+
date = str(self.days2date(ms))
|
|
61
|
+
days = int(ms / 1000 / 60 / 60 / 24)
|
|
62
|
+
|
|
63
|
+
second = ms / 1000 - days * 60 * 60 * 24
|
|
64
|
+
hour = int(second / 60 / 60)
|
|
65
|
+
minute = int((second - hour * 60 * 60) / 60) - 1
|
|
66
|
+
|
|
67
|
+
if hour < 0:
|
|
68
|
+
hour = "00"
|
|
69
|
+
elif hour < 10:
|
|
70
|
+
hour = "0" + str(hour)
|
|
71
|
+
else:
|
|
72
|
+
hour = str(hour)
|
|
73
|
+
|
|
74
|
+
if minute < 0:
|
|
75
|
+
minute = "00"
|
|
76
|
+
elif minute < 10:
|
|
77
|
+
minute = "0" + str(minute)
|
|
78
|
+
else:
|
|
79
|
+
minute = str(minute)
|
|
80
|
+
# second = (second - hour*60*60 - minute*60)#hour
|
|
81
|
+
value = date + " " + hour + ":" + minute + ":00"
|
|
82
|
+
return value
|
|
83
|
+
|
|
84
|
+
def millisecond2date2(self, ms):
|
|
85
|
+
# ms = 1566898740000
|
|
86
|
+
ms = int(ms)
|
|
87
|
+
date = str(self.days2date(ms))
|
|
88
|
+
days = int(ms / 1000 / 60 / 60 / 24)
|
|
89
|
+
|
|
90
|
+
second = ms / 1000 - days * 60 * 60 * 24
|
|
91
|
+
hour = int(second / 60 / 60)
|
|
92
|
+
minute = int((second - hour * 60 * 60) / 60)
|
|
93
|
+
second = int(second - hour * 60 * 60 - minute * 60)
|
|
94
|
+
|
|
95
|
+
if hour < 0:
|
|
96
|
+
hour = "00"
|
|
97
|
+
elif hour < 10:
|
|
98
|
+
hour = "0" + str(hour)
|
|
99
|
+
else:
|
|
100
|
+
hour = str(hour)
|
|
101
|
+
|
|
102
|
+
if minute < 0:
|
|
103
|
+
minute = "00"
|
|
104
|
+
elif minute < 10:
|
|
105
|
+
minute = "0" + str(minute)
|
|
106
|
+
else:
|
|
107
|
+
minute = str(minute)
|
|
108
|
+
# second = (second - hour*60*60 - minute*60)#hour
|
|
109
|
+
if second < 10:
|
|
110
|
+
second = "0{}".format(second)
|
|
111
|
+
value = "{} {}:{}:{}".format(date, hour, minute, second)
|
|
112
|
+
return value
|
|
113
|
+
|
|
114
|
+
@staticmethod
|
|
115
|
+
def date2millisecond(date):
|
|
116
|
+
# date = '2019-06-02 15:30:00'
|
|
117
|
+
date = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S")
|
|
118
|
+
date = date + datetime.timedelta(minutes=1)
|
|
119
|
+
second = date - datetime.datetime(1970, 1, 1, 0, 0, 0)
|
|
120
|
+
|
|
121
|
+
second = second.days * 24 * 60 * 60 + second.seconds
|
|
122
|
+
# ms = ms*1000
|
|
123
|
+
|
|
124
|
+
return second
|
|
125
|
+
|
|
126
|
+
@staticmethod
|
|
127
|
+
def create_date(start, today=False): # start = '2018-07-31'
|
|
128
|
+
start = datetime.datetime.strptime(
|
|
129
|
+
start, "%Y-%m-%d"
|
|
130
|
+
).date() + datetime.timedelta(days=1)
|
|
131
|
+
end = datetime.date.today()
|
|
132
|
+
|
|
133
|
+
day_len = (end - start).days
|
|
134
|
+
if today:
|
|
135
|
+
day_len = (end - start).days + 1
|
|
136
|
+
date = [
|
|
137
|
+
str(start + datetime.timedelta(days=dat)) for dat in range(day_len)
|
|
138
|
+
]
|
|
139
|
+
return date
|
|
140
|
+
|
|
141
|
+
@staticmethod
|
|
142
|
+
def remove_outlier(data, var_name):
|
|
143
|
+
|
|
144
|
+
value = list(data[var_name])
|
|
145
|
+
mean = np.mean(value, axis=0)
|
|
146
|
+
sd = np.std(value, axis=0)
|
|
147
|
+
if sd < 1:
|
|
148
|
+
return data
|
|
149
|
+
|
|
150
|
+
_bool = []
|
|
151
|
+
for x in value:
|
|
152
|
+
if (5 * mean) > x > (-5 * mean):
|
|
153
|
+
_bool.append(True)
|
|
154
|
+
else:
|
|
155
|
+
_bool.append(False)
|
|
156
|
+
|
|
157
|
+
data = data[_bool]
|
|
158
|
+
return data
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def change_chinese_date_us(d):
|
|
162
|
+
y, m, d = [int(x) for x in d.split("/")]
|
|
163
|
+
y = y + 1911
|
|
164
|
+
date = datetime.date(y, m, d)
|
|
165
|
+
return date
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
import requests
|
|
8
|
+
from lxml import etree
|
|
9
|
+
|
|
10
|
+
from FinMind.crawler.base import BaseCrawler, USER_AGENT
|
|
11
|
+
|
|
12
|
+
PATH = "/".join(os.path.abspath(__file__).split("/")[:-2])
|
|
13
|
+
sys.path.append(PATH)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class CommoditiesCrawler(BaseCrawler):
|
|
17
|
+
def __init__(self):
|
|
18
|
+
super(CommoditiesCrawler, self).__init__()
|
|
19
|
+
|
|
20
|
+
@staticmethod
|
|
21
|
+
def create_loop_list():
|
|
22
|
+
# self.based_url = 'https://www.investing.com/commodities/'
|
|
23
|
+
kind_list = ["meats", "grains", "energies", "softs", "metals"]
|
|
24
|
+
loop_list = []
|
|
25
|
+
for kind in kind_list:
|
|
26
|
+
print(kind)
|
|
27
|
+
index_url = "https://www.investing.com/commodities/" + kind
|
|
28
|
+
headers = {
|
|
29
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
|
30
|
+
"Accept-Encoding": "gzip, deflate, br",
|
|
31
|
+
"Accept-Language": "zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7",
|
|
32
|
+
"Cache-Control": "max-age=0",
|
|
33
|
+
"Connection": "keep-alive",
|
|
34
|
+
"Host": "www.investing.com",
|
|
35
|
+
"Upgrade-Insecure-Requests": "1",
|
|
36
|
+
"User-Agent": USER_AGENT,
|
|
37
|
+
}
|
|
38
|
+
res = requests.get(index_url, verify=True, headers=headers)
|
|
39
|
+
|
|
40
|
+
tem = re.findall(
|
|
41
|
+
' data-name="[A-Za-z ]+" data-id="[0-9]+" ', res.text
|
|
42
|
+
)
|
|
43
|
+
futures_id_list = [re.findall("[0-9]+", te)[0] for te in tem]
|
|
44
|
+
data_name_list = [
|
|
45
|
+
re.findall('"[A-Za-z ]+"', te)[0].replace('"', "") for te in tem
|
|
46
|
+
]
|
|
47
|
+
[
|
|
48
|
+
loop_list.append([futures_id_list[i], data_name_list[i]])
|
|
49
|
+
for i in range(len(futures_id_list))
|
|
50
|
+
]
|
|
51
|
+
return loop_list
|
|
52
|
+
|
|
53
|
+
@staticmethod
|
|
54
|
+
def get_end_date():
|
|
55
|
+
end_date = datetime.datetime.now().date()
|
|
56
|
+
end_date = end_date + datetime.timedelta(-1)
|
|
57
|
+
y = str(end_date.year)
|
|
58
|
+
m = (
|
|
59
|
+
str(end_date.month)
|
|
60
|
+
if end_date.month > 9
|
|
61
|
+
else "0" + str(end_date.month)
|
|
62
|
+
)
|
|
63
|
+
d = str(end_date.day) if end_date.day > 9 else "0" + str(end_date.day)
|
|
64
|
+
|
|
65
|
+
return m + "/" + d + "/" + y
|
|
66
|
+
|
|
67
|
+
def crawler(self, loop): # loop = ['49769', 'Brent Oil Futures']
|
|
68
|
+
def get_value(template):
|
|
69
|
+
|
|
70
|
+
date = int(template[0].attrib["data-real-value"])
|
|
71
|
+
date = int(date / 60 / 60 / 24)
|
|
72
|
+
date = str(
|
|
73
|
+
datetime.date(1970, 1, 1) + datetime.timedelta(days=date)
|
|
74
|
+
)
|
|
75
|
+
v = [
|
|
76
|
+
float(
|
|
77
|
+
template[template_index]
|
|
78
|
+
.attrib["data-real-value"]
|
|
79
|
+
.replace(",", "")
|
|
80
|
+
)
|
|
81
|
+
for template_index in range(1, 6)
|
|
82
|
+
]
|
|
83
|
+
_price, _open, _high, _low, _vol = v
|
|
84
|
+
|
|
85
|
+
change = (
|
|
86
|
+
float(template[6].text.replace("%", "").replace(",", "")) / 100
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
return pd.DataFrame(
|
|
90
|
+
[date, _price, _open, _high, _low, _vol, change]
|
|
91
|
+
).T
|
|
92
|
+
|
|
93
|
+
# -------------------------------------------------------------------
|
|
94
|
+
futures_id, data_name = loop
|
|
95
|
+
header = data_name + " Historical data"
|
|
96
|
+
st_date, end_date = (
|
|
97
|
+
"01/01/1970",
|
|
98
|
+
self.get_end_date(),
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
bonds_url = "https://www.investing.com/instruments/HistoricalDataAjax"
|
|
102
|
+
form_data = {
|
|
103
|
+
"curr_id": futures_id,
|
|
104
|
+
"header": header,
|
|
105
|
+
"st_date": st_date,
|
|
106
|
+
"end_date": end_date,
|
|
107
|
+
"interval_sec": "Daily",
|
|
108
|
+
"sort_col": "date",
|
|
109
|
+
"sort_ord": "DESC",
|
|
110
|
+
"action": "historical_data",
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
headers = {
|
|
114
|
+
"Accept": "text/plain, */*; q=0.01",
|
|
115
|
+
"Accept-Encoding": "gzip, deflate, br",
|
|
116
|
+
"Accept-Language": "zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7",
|
|
117
|
+
"Connection": "keep-alive",
|
|
118
|
+
"Content-Length": "183",
|
|
119
|
+
"Content-Type": "application/x-www-form-urlencoded",
|
|
120
|
+
"Host": "www.investing.com",
|
|
121
|
+
"Origin": "https://www.investing.com",
|
|
122
|
+
"Referer": "https://www.investing.com/commodities/brent-oil-historical-data",
|
|
123
|
+
"User-Agent": USER_AGENT,
|
|
124
|
+
"X-Requested-With": "XMLHttpRequest",
|
|
125
|
+
}
|
|
126
|
+
print("requests post")
|
|
127
|
+
res = requests.post(
|
|
128
|
+
bonds_url, verify=True, headers=headers, data=form_data
|
|
129
|
+
)
|
|
130
|
+
print("data clean")
|
|
131
|
+
page = etree.HTML(res.text)
|
|
132
|
+
col_name = page.xpath("//tr//th")
|
|
133
|
+
|
|
134
|
+
col_name = [
|
|
135
|
+
c.text.replace(" %", "Percent").replace(".", "") for c in col_name
|
|
136
|
+
]
|
|
137
|
+
col_name = ["date" if c == "Date" else c for c in col_name]
|
|
138
|
+
|
|
139
|
+
data = pd.DataFrame()
|
|
140
|
+
td_path = page.xpath("//tr//td")
|
|
141
|
+
for i in range(0, len(td_path) - 7, 7):
|
|
142
|
+
tem = td_path[i : i + 7]
|
|
143
|
+
value = get_value(tem)
|
|
144
|
+
data = data.append(value)
|
|
145
|
+
|
|
146
|
+
if len(data) > 0:
|
|
147
|
+
data.columns = col_name
|
|
148
|
+
data["name"] = data_name
|
|
149
|
+
# data['data_id'] = futures_id
|
|
150
|
+
data = data.sort_values("date")
|
|
151
|
+
data.index = range(len(data))
|
|
152
|
+
|
|
153
|
+
return data
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
|
|
3
|
+
from FinMind.crawler import GovernmentBondsCrawler, CommoditiesCrawler
|
|
4
|
+
|
|
5
|
+
# -------------------------------------------------------------------
|
|
6
|
+
commodities_crawler = CommoditiesCrawler()
|
|
7
|
+
# get futures list
|
|
8
|
+
loop_list = commodities_crawler.create_loop_list()
|
|
9
|
+
|
|
10
|
+
# get one futures data
|
|
11
|
+
commodities_df = commodities_crawler.crawler(loop_list[0])
|
|
12
|
+
|
|
13
|
+
# or get all futures data
|
|
14
|
+
commodities_df = pd.DataFrame()
|
|
15
|
+
for loop in loop_list:
|
|
16
|
+
print(loop)
|
|
17
|
+
value = commodities_crawler.crawler(loop)
|
|
18
|
+
commodities_df = commodities_df.append(value)
|
|
19
|
+
# -------------------------------------------------------------------
|
|
20
|
+
gb_crawler = GovernmentBondsCrawler()
|
|
21
|
+
# get futures list
|
|
22
|
+
loop_list = gb_crawler.create_loop_list()
|
|
23
|
+
|
|
24
|
+
# get one futures data
|
|
25
|
+
gd_df = gb_crawler.crawler(loop_list[0])
|
|
26
|
+
|
|
27
|
+
# or get all futures data
|
|
28
|
+
gd_df = pd.DataFrame()
|
|
29
|
+
for loop in loop_list:
|
|
30
|
+
print(loop)
|
|
31
|
+
value = gb_crawler.crawler(loop)
|
|
32
|
+
gd_df = gd_df.append(value)
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"""
|
|
2
|
+
政府債券
|
|
3
|
+
G8-俄羅斯、美國、加拿大、英國、法國、德國、義大利及日本
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import datetime
|
|
7
|
+
import os
|
|
8
|
+
import re
|
|
9
|
+
import sys
|
|
10
|
+
|
|
11
|
+
import pandas as pd
|
|
12
|
+
import requests
|
|
13
|
+
from lxml import etree
|
|
14
|
+
|
|
15
|
+
from FinMind.crawler.base import BaseCrawler, USER_AGENT
|
|
16
|
+
|
|
17
|
+
PATH = "/".join(os.path.abspath(__file__).split("/")[:-2])
|
|
18
|
+
sys.path.append(PATH)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class GovernmentBondsCrawler(BaseCrawler):
|
|
22
|
+
@staticmethod
|
|
23
|
+
def create_loop_list():
|
|
24
|
+
def get_data_id_name(url):
|
|
25
|
+
|
|
26
|
+
headers = {
|
|
27
|
+
"Accept": "*/*",
|
|
28
|
+
"Accept-Encoding": "gzip, deflate, br",
|
|
29
|
+
"Accept-Language": "zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7",
|
|
30
|
+
"Connection": "keep-alive",
|
|
31
|
+
"Host": "www.investing.com",
|
|
32
|
+
"Referer": url,
|
|
33
|
+
"User-Agent": USER_AGENT,
|
|
34
|
+
"X-Requested-With": "XMLHttpRequest",
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
res = requests.get(url, verify=True, headers=headers)
|
|
38
|
+
tem_data_id = re.findall('data-id="[0-9]+"', res.text)
|
|
39
|
+
tem_data_id = [di.replace("data-id=", "") for di in tem_data_id]
|
|
40
|
+
page = etree.HTML(res.text)
|
|
41
|
+
_data_id = []
|
|
42
|
+
_data_name = []
|
|
43
|
+
for di in tem_data_id:
|
|
44
|
+
tem = page.xpath("//span[@data-id={}]".format(di))
|
|
45
|
+
if len(tem) > 0:
|
|
46
|
+
_data_id.append(tem[0].attrib["data-id"])
|
|
47
|
+
_data_name.append(tem[0].attrib["data-name"])
|
|
48
|
+
|
|
49
|
+
return _data_id, _data_name
|
|
50
|
+
|
|
51
|
+
def get_country_url():
|
|
52
|
+
index_url = "https://www.investing.com/rates-bonds/"
|
|
53
|
+
headers = {
|
|
54
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
|
55
|
+
"Accept-Encoding": "gzip, deflate, br",
|
|
56
|
+
"Accept-Language": "zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7",
|
|
57
|
+
"Cache-Control": "max-age=0",
|
|
58
|
+
"Connection": "keep-alive",
|
|
59
|
+
"Host": "www.investing.com",
|
|
60
|
+
"Upgrade-Insecure-Requests": "1",
|
|
61
|
+
"User-Agent": USER_AGENT,
|
|
62
|
+
}
|
|
63
|
+
res = requests.get(index_url, verify=True, headers=headers)
|
|
64
|
+
|
|
65
|
+
data_country_id = re.findall('data-country-id="[0-9]+"', res.text)
|
|
66
|
+
data_country_id = [
|
|
67
|
+
dci.replace("data-country-id=", "") for dci in data_country_id
|
|
68
|
+
]
|
|
69
|
+
page = etree.HTML(res.text)
|
|
70
|
+
tem = []
|
|
71
|
+
for dci in data_country_id:
|
|
72
|
+
tem.append(
|
|
73
|
+
page.xpath("//option[@data-country-id={}]".format(dci))[0]
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
url = [
|
|
77
|
+
"https://www.investing.com" + te.attrib["value"] for te in tem
|
|
78
|
+
]
|
|
79
|
+
# G8 and china
|
|
80
|
+
select = [
|
|
81
|
+
"canada",
|
|
82
|
+
"china",
|
|
83
|
+
"france",
|
|
84
|
+
"germany",
|
|
85
|
+
"japan",
|
|
86
|
+
"russia",
|
|
87
|
+
"uk",
|
|
88
|
+
"usa",
|
|
89
|
+
"italy",
|
|
90
|
+
]
|
|
91
|
+
countries_url = []
|
|
92
|
+
for url_index in range(len(url)):
|
|
93
|
+
tem = url[url_index].replace(
|
|
94
|
+
"https://www.investing.com/rates-bonds/", ""
|
|
95
|
+
)
|
|
96
|
+
tem = tem.replace("-government-bonds", "")
|
|
97
|
+
if tem in select:
|
|
98
|
+
countries_url.append(url[url_index])
|
|
99
|
+
return countries_url
|
|
100
|
+
|
|
101
|
+
# main
|
|
102
|
+
country_url = get_country_url()
|
|
103
|
+
loop_list = []
|
|
104
|
+
for curl in country_url: # curl = country_url[0]
|
|
105
|
+
print(curl)
|
|
106
|
+
data_id, data_name = get_data_id_name(curl)
|
|
107
|
+
for i in range(len(data_id)):
|
|
108
|
+
loop_list.append([data_id[i], data_name[i]])
|
|
109
|
+
|
|
110
|
+
return loop_list
|
|
111
|
+
|
|
112
|
+
@staticmethod
|
|
113
|
+
def get_end_date():
|
|
114
|
+
|
|
115
|
+
end_date = datetime.datetime.now().date()
|
|
116
|
+
end_date = end_date + datetime.timedelta(-1)
|
|
117
|
+
y = str(end_date.year)
|
|
118
|
+
m = (
|
|
119
|
+
str(end_date.month)
|
|
120
|
+
if end_date.month > 9
|
|
121
|
+
else "0" + str(end_date.month)
|
|
122
|
+
)
|
|
123
|
+
d = str(end_date.day) if end_date.day > 9 else "0" + str(end_date.day)
|
|
124
|
+
|
|
125
|
+
return "{}/{}/{}".format(m, d, y)
|
|
126
|
+
|
|
127
|
+
def crawler(self, loop): # loop = ['23681', 'Germany 3 Month']
|
|
128
|
+
def get_value(template):
|
|
129
|
+
|
|
130
|
+
date = int(template[0].attrib["data-real-value"])
|
|
131
|
+
date = int(date / 60 / 60 / 24)
|
|
132
|
+
date = str(
|
|
133
|
+
datetime.date(1970, 1, 1) + datetime.timedelta(days=date)
|
|
134
|
+
)
|
|
135
|
+
v = [
|
|
136
|
+
float(template[template_index].text)
|
|
137
|
+
for template_index in range(1, 5)
|
|
138
|
+
if template[template_index].text is not None
|
|
139
|
+
]
|
|
140
|
+
if len(v) == 0:
|
|
141
|
+
return pd.DataFrame()
|
|
142
|
+
_price, _open, _high, _low = v
|
|
143
|
+
change = (
|
|
144
|
+
float(template[5].text.replace("%", "").replace(",", "")) / 100
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
return pd.DataFrame([date, _price, _open, _high, _low, change]).T
|
|
148
|
+
|
|
149
|
+
cid, data_name = loop
|
|
150
|
+
header = data_name + " Bond Yield Historical data"
|
|
151
|
+
st_date, end_date = (
|
|
152
|
+
"01/01/1970",
|
|
153
|
+
self.get_end_date(),
|
|
154
|
+
)
|
|
155
|
+
bonds_url = "https://www.investing.com/instruments/HistoricalDataAjax"
|
|
156
|
+
|
|
157
|
+
form_data = {
|
|
158
|
+
"curr_id": cid,
|
|
159
|
+
"header": header,
|
|
160
|
+
"st_date": st_date,
|
|
161
|
+
"end_date": end_date,
|
|
162
|
+
"interval_sec": "Daily",
|
|
163
|
+
"sort_col": "date",
|
|
164
|
+
"sort_ord": "DESC",
|
|
165
|
+
"action": "historical_data",
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
headers = {
|
|
169
|
+
"Accept": "text/plain, */*; q=0.01",
|
|
170
|
+
"Accept-Encoding": "gzip, deflate, br",
|
|
171
|
+
"Accept-Language": "zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7",
|
|
172
|
+
"Connection": "keep-alive",
|
|
173
|
+
"Content-Length": "192",
|
|
174
|
+
"Content-Type": "application/x-www-form-urlencoded",
|
|
175
|
+
"Host": "www.investing.com",
|
|
176
|
+
"Origin": "https://www.investing.com",
|
|
177
|
+
"Referer": "https://www.investing.com/rates-bonds/france-1-month-bond-yield-historical-data",
|
|
178
|
+
"User-Agent": USER_AGENT,
|
|
179
|
+
"X-Requested-With": "XMLHttpRequest",
|
|
180
|
+
}
|
|
181
|
+
print("requests post")
|
|
182
|
+
res = requests.post(
|
|
183
|
+
bonds_url, verify=True, headers=headers, data=form_data
|
|
184
|
+
)
|
|
185
|
+
print("data clean")
|
|
186
|
+
page = etree.HTML(res.text)
|
|
187
|
+
tr_path = page.xpath("//tr")
|
|
188
|
+
|
|
189
|
+
col_name = [col.text for col in tr_path[0].xpath("//th")]
|
|
190
|
+
col_name = [c.replace(" %", "Percent") for c in col_name]
|
|
191
|
+
col_name = ["date" if c == "Date" else c for c in col_name]
|
|
192
|
+
data = pd.DataFrame()
|
|
193
|
+
td_path = page.xpath("//tr//td")
|
|
194
|
+
for i in range(0, len(td_path) - 6, 6):
|
|
195
|
+
tem = td_path[i : i + 6]
|
|
196
|
+
value = get_value(tem)
|
|
197
|
+
if len(value) > 0:
|
|
198
|
+
data = data.append(value)
|
|
199
|
+
|
|
200
|
+
if len(data) > 0:
|
|
201
|
+
data.columns = col_name
|
|
202
|
+
data["name"] = "{}".format(data_name)
|
|
203
|
+
# data['data_id'] = cid
|
|
204
|
+
data = data.sort_values("date")
|
|
205
|
+
data.index = range(len(data))
|
|
206
|
+
|
|
207
|
+
return data
|