scraper2-hj3415 0.0.3__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scraper2_hj3415/cli.py +35 -11
- scraper2_hj3415/miscrapy/mi/__init__.py +0 -0
- scraper2_hj3415/miscrapy/mi/items.py +7 -0
- scraper2_hj3415/miscrapy/mi/middlewares.py +103 -0
- scraper2_hj3415/miscrapy/mi/pipelines.py +39 -0
- scraper2_hj3415/miscrapy/mi/settings.py +103 -0
- scraper2_hj3415/miscrapy/mi/spiders/__init__.py +4 -0
- scraper2_hj3415/miscrapy/mi/spiders/aud.py +23 -0
- scraper2_hj3415/miscrapy/mi/spiders/chf.py +25 -0
- scraper2_hj3415/miscrapy/mi/spiders/gbond3y.py +24 -0
- scraper2_hj3415/miscrapy/mi/spiders/gold.py +25 -0
- scraper2_hj3415/miscrapy/mi/spiders/kosdaq.py +23 -0
- scraper2_hj3415/miscrapy/mi/spiders/kospi.py +23 -0
- scraper2_hj3415/miscrapy/mi/spiders/mihistory.py +333 -0
- scraper2_hj3415/miscrapy/mi/spiders/silver.py +25 -0
- scraper2_hj3415/miscrapy/mi/spiders/sp500.py +24 -0
- scraper2_hj3415/miscrapy/mi/spiders/usdidx.py +30 -0
- scraper2_hj3415/miscrapy/mi/spiders/usdkrw.py +24 -0
- scraper2_hj3415/miscrapy/mi/spiders/wti.py +25 -0
- scraper2_hj3415/miscrapy/run.py +156 -0
- scraper2_hj3415/miscrapy/scrapy.cfg +11 -0
- scraper2_hj3415/nfscrapy/nfs/items.py +24 -0
- scraper2_hj3415/nfscrapy/nfs/pipelines.py +94 -16
- scraper2_hj3415/nfscrapy/nfs/settings.py +2 -1
- scraper2_hj3415/nfscrapy/nfs/spiders/_c108.py +95 -0
- scraper2_hj3415/nfscrapy/nfs/spiders/c101.py +3 -11
- scraper2_hj3415/nfscrapy/nfs/spiders/c103.py +174 -0
- scraper2_hj3415/nfscrapy/nfs/spiders/c104.py +228 -0
- scraper2_hj3415/nfscrapy/nfs/spiders/c106.py +92 -0
- scraper2_hj3415/nfscrapy/nfs/spiders/common.py +60 -0
- scraper2_hj3415/nfscrapy/run.py +76 -26
- {scraper2_hj3415-0.0.3.dist-info → scraper2_hj3415-0.1.1.dist-info}/METADATA +1 -1
- scraper2_hj3415-0.1.1.dist-info/RECORD +42 -0
- scraper2_hj3415-0.1.1.dist-info/entry_points.txt +2 -0
- scraper2_hj3415-0.0.3.dist-info/RECORD +0 -17
- scraper2_hj3415-0.0.3.dist-info/entry_points.txt +0 -2
- {scraper2_hj3415-0.0.3.dist-info → scraper2_hj3415-0.1.1.dist-info}/WHEEL +0 -0
- {scraper2_hj3415-0.0.3.dist-info → scraper2_hj3415-0.1.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import scrapy
|
|
3
|
+
from util_hj3415 import utils
|
|
4
|
+
from scrapy.selector import Selector
|
|
5
|
+
from selenium.common.exceptions import NoSuchElementException
|
|
6
|
+
from mi import items
|
|
7
|
+
|
|
8
|
+
# cmd usage : scrapy crawl mihistory -a year=1
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
WAIT = 1
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class MIHistory(scrapy.Spider):
|
|
15
|
+
name = 'mihistory'
|
|
16
|
+
allowed_domains = ['finance.naver.com']
|
|
17
|
+
|
|
18
|
+
def __init__(self, year=1):
|
|
19
|
+
self.year = int(year)
|
|
20
|
+
self.driver = utils.get_driver()
|
|
21
|
+
if self.driver is None:
|
|
22
|
+
raise
|
|
23
|
+
# 대략1년전 kospi, kosdaq -> 42, gbond3y -> 38, s&p -> 27, usdkrw -> 26, wti -> 38, gold -> 38, audchf -> 46
|
|
24
|
+
self.last_page_kospi_kosdaq = 42 * self.year
|
|
25
|
+
self.last_page_3bond3y = 38 * self.year
|
|
26
|
+
self.last_page_sp500 = 27 * self.year
|
|
27
|
+
self.last_page_usdkrw = 26 * self.year
|
|
28
|
+
self.last_page_wti = 38 * self.year
|
|
29
|
+
self.last_page_gold = 38 * self.year
|
|
30
|
+
self.last_page_silver = 38 * self.year
|
|
31
|
+
self.last_page_audchf = 46 * self.year
|
|
32
|
+
self.item_list = []
|
|
33
|
+
self.aud_dict = {}
|
|
34
|
+
self.audchf_dict = {}
|
|
35
|
+
|
|
36
|
+
def start_requests(self):
|
|
37
|
+
# reference from https://docs.scrapy.org/en/latest/topics/request-response.html
|
|
38
|
+
print(f'Parsing Market Index history...{self.year} year..')
|
|
39
|
+
yield scrapy.Request(
|
|
40
|
+
url=f'https://finance.naver.com/sise/sise_index_day.nhn?code=KOSPI&page={self.last_page_kospi_kosdaq}',
|
|
41
|
+
callback=self.parse_kospi,
|
|
42
|
+
cb_kwargs=dict(page=self.last_page_kospi_kosdaq),
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
def parse_kospi(self, response, page):
|
|
46
|
+
print(f"Parsing ...kospi {page} page", flush=True)
|
|
47
|
+
item = items.MIitems()
|
|
48
|
+
# KOSPI를 스크랩하는코드
|
|
49
|
+
time.sleep(WAIT)
|
|
50
|
+
self.logger.info(response.url)
|
|
51
|
+
for i in [12, 11, 10, 5, 4, 3]:
|
|
52
|
+
item['title'] = 'kospi'
|
|
53
|
+
item['date'] = response.xpath(f'/html/body/div/table[1]/tr[{i}]/td[1]/text()').get()
|
|
54
|
+
item['value'] = response.xpath(f'/html/body/div/table[1]/tr[{i}]/td[2]/text()').get().replace(',', '')
|
|
55
|
+
self.logger.info(f"date : {item['date']}, value : {item['value']}")
|
|
56
|
+
yield item
|
|
57
|
+
if page > 1:
|
|
58
|
+
yield scrapy.Request(
|
|
59
|
+
url=f'https://finance.naver.com/sise/sise_index_day.nhn?code=KOSPI&page={page - 1}',
|
|
60
|
+
callback=self.parse_kospi,
|
|
61
|
+
cb_kwargs=dict(page=page - 1),
|
|
62
|
+
)
|
|
63
|
+
else:
|
|
64
|
+
yield scrapy.Request(
|
|
65
|
+
url=f'https://finance.naver.com/sise/sise_index_day.nhn?code=KOSDAQ&page={self.last_page_kospi_kosdaq}',
|
|
66
|
+
callback=self.parse_kosdaq,
|
|
67
|
+
cb_kwargs=dict(page=self.last_page_kospi_kosdaq),
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
def parse_kosdaq(self, response, page):
|
|
71
|
+
print(f"Parsing ...kosdaq {page} page", flush=True)
|
|
72
|
+
item = items.MIitems()
|
|
73
|
+
# KOSDAQ를 스크랩하는코드
|
|
74
|
+
time.sleep(WAIT)
|
|
75
|
+
self.logger.info(response.url)
|
|
76
|
+
for i in [12, 11, 10, 5, 4, 3]:
|
|
77
|
+
item['title'] = 'kosdaq'
|
|
78
|
+
item['date'] = response.xpath(f'/html/body/div/table[1]/tr[{i}]/td[1]/text()').get()
|
|
79
|
+
item['value'] = response.xpath(f'/html/body/div/table[1]/tr[{i}]/td[2]/text()').get().replace(',', '')
|
|
80
|
+
self.logger.info(f"date : {item['date']}, value : {item['value']}")
|
|
81
|
+
yield item
|
|
82
|
+
if page > 1:
|
|
83
|
+
yield scrapy.Request(
|
|
84
|
+
url=f'https://finance.naver.com/sise/sise_index_day.nhn?code=KOSDAQ&page={page - 1}',
|
|
85
|
+
callback=self.parse_kosdaq,
|
|
86
|
+
cb_kwargs=dict(page=page - 1),
|
|
87
|
+
)
|
|
88
|
+
else:
|
|
89
|
+
yield scrapy.Request(
|
|
90
|
+
url=f'https://finance.naver.com/marketindex/interestDailyQuote.nhn?marketindexCd=IRR_GOVT03Y&page={self.last_page_3bond3y}',
|
|
91
|
+
callback=self.parse_gbond3y,
|
|
92
|
+
cb_kwargs=dict(page=self.last_page_3bond3y),
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
def parse_gbond3y(self, response, page):
|
|
96
|
+
print(f"Parsing ...gbond3y {page} page", flush=True)
|
|
97
|
+
item = items.MIitems()
|
|
98
|
+
# 국고채 3년금리를 스크랩하는코드
|
|
99
|
+
time.sleep(WAIT)
|
|
100
|
+
self.logger.info(response.url)
|
|
101
|
+
for i in range(7, 0, -1):
|
|
102
|
+
item['title'] = 'gbond3y'
|
|
103
|
+
item['date'] = (response.css(f'body > div > table > tbody > tr:nth-child({i}) > td.date::text')
|
|
104
|
+
.extract()[0].replace('\n', '').replace('\t', ''))
|
|
105
|
+
item['value'] = (response.css(f'body > div > table > tbody > tr:nth-child({i}) > td:nth-child(2)::text')
|
|
106
|
+
.extract()[0])
|
|
107
|
+
self.logger.info(f"date : {item['date']}, value : {item['value']}")
|
|
108
|
+
yield item
|
|
109
|
+
if page > 1:
|
|
110
|
+
yield scrapy.Request(
|
|
111
|
+
url=f'https://finance.naver.com/marketindex/interestDailyQuote.nhn?marketindexCd=IRR_GOVT03Y&page={page - 1}',
|
|
112
|
+
callback=self.parse_gbond3y,
|
|
113
|
+
cb_kwargs=dict(page=page - 1),
|
|
114
|
+
)
|
|
115
|
+
else:
|
|
116
|
+
yield scrapy.Request(
|
|
117
|
+
url=f'https://finance.naver.com/marketindex/worldDailyQuote.nhn?fdtc=4&marketindexCd=FX_USDAUD',
|
|
118
|
+
callback=self.parse_aud,
|
|
119
|
+
cb_kwargs=dict(page=self.last_page_audchf),
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
def parse_aud(self, response, page):
|
|
123
|
+
print(f"Parsing ...aud {page} page", flush=True)
|
|
124
|
+
item = items.MIitems()
|
|
125
|
+
# AUD를 스크랩하는코드
|
|
126
|
+
time.sleep(WAIT)
|
|
127
|
+
self.logger.info(response.url)
|
|
128
|
+
for i in range(7, 0, -1):
|
|
129
|
+
item['title'] = 'aud'
|
|
130
|
+
item['date'] = (response.xpath(f'/html/body/div/table/tbody/tr[{i}]/td[1]/text()')
|
|
131
|
+
.get().replace('\t', '').replace('\n', ''))
|
|
132
|
+
item['value'] = (response.xpath(f'/html/body/div/table/tbody/tr[{i}]/td[2]/text()')
|
|
133
|
+
.get().replace(',', '').replace('\t', '').replace('\n', ''))
|
|
134
|
+
self.logger.info(f"date : {item['date']}, value : {item['value']}")
|
|
135
|
+
yield item
|
|
136
|
+
if page > 1:
|
|
137
|
+
yield scrapy.Request(
|
|
138
|
+
url=f'https://finance.naver.com/marketindex/worldDailyQuote.nhn?fdtc=4&marketindexCd=FX_USDAUD&page={page - 1}',
|
|
139
|
+
callback=self.parse_aud,
|
|
140
|
+
cb_kwargs=dict(page=page - 1),
|
|
141
|
+
)
|
|
142
|
+
else:
|
|
143
|
+
yield scrapy.Request(
|
|
144
|
+
url=f'https://finance.naver.com/marketindex/worldDailyQuote.nhn?fdtc=4&marketindexCd=FX_USDCHF',
|
|
145
|
+
callback=self.parse_chf,
|
|
146
|
+
cb_kwargs=dict(page=self.last_page_audchf),
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
def parse_chf(self, response, page):
|
|
150
|
+
print(f"Parsing ...chf {page} page", flush=True)
|
|
151
|
+
item = items.MIitems()
|
|
152
|
+
# CHF를 스크랩하는코드
|
|
153
|
+
time.sleep(WAIT)
|
|
154
|
+
self.logger.info(response.url)
|
|
155
|
+
for i in range(7, 0, -1):
|
|
156
|
+
item['title'] = 'chf'
|
|
157
|
+
item['date'] = (response.xpath(f'/html/body/div/table/tbody/tr[{i}]/td[1]/text()')
|
|
158
|
+
.get().replace('\t', '').replace('\n', ''))
|
|
159
|
+
item['value'] = (response.xpath(f'/html/body/div/table/tbody/tr[{i}]/td[2]/text()')
|
|
160
|
+
.get().replace(',', '').replace('\t', '').replace('\n', ''))
|
|
161
|
+
self.logger.info(f"date : {item['date']}, value : {item['value']}")
|
|
162
|
+
yield item
|
|
163
|
+
if page > 1:
|
|
164
|
+
yield scrapy.Request(
|
|
165
|
+
url=f'https://finance.naver.com/marketindex/worldDailyQuote.nhn?fdtc=4&marketindexCd=FX_USDCHF&page={page - 1}',
|
|
166
|
+
callback=self.parse_chf,
|
|
167
|
+
cb_kwargs=dict(page=page - 1),
|
|
168
|
+
)
|
|
169
|
+
else:
|
|
170
|
+
yield scrapy.Request(
|
|
171
|
+
url=f'https://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_USDKRW&page={self.last_page_usdkrw}',
|
|
172
|
+
callback=self.parse_usdkrw,
|
|
173
|
+
cb_kwargs=dict(page=self.last_page_usdkrw),
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
def parse_usdkrw(self, response, page):
|
|
177
|
+
print(f"Parsing ...usdkrw {page} page", flush=True)
|
|
178
|
+
item = items.MIitems()
|
|
179
|
+
# 달러 원화 환율 스크랩
|
|
180
|
+
time.sleep(WAIT)
|
|
181
|
+
self.logger.info(response.url)
|
|
182
|
+
for i in range(10, 0, -1):
|
|
183
|
+
item['title'] = 'usdkrw'
|
|
184
|
+
item['date'] = response.xpath(f'/html/body/div/table/tbody/tr[{i}]/td[1]/text()').get()
|
|
185
|
+
item['value'] = (response.xpath(f'/html/body/div/table/tbody/tr[{i}]/td[2]/text()')
|
|
186
|
+
.get().replace(',', ''))
|
|
187
|
+
self.logger.info(f"date : {item['date']}, value : {item['value']}")
|
|
188
|
+
yield item
|
|
189
|
+
if page > 1:
|
|
190
|
+
yield scrapy.Request(
|
|
191
|
+
url=f'https://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_USDKRW&page={page - 1}',
|
|
192
|
+
callback=self.parse_usdkrw,
|
|
193
|
+
cb_kwargs=dict(page=page - 1),
|
|
194
|
+
)
|
|
195
|
+
else:
|
|
196
|
+
yield scrapy.Request(
|
|
197
|
+
url=f'https://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=OIL_CL&fdtc=2&page={self.last_page_wti}',
|
|
198
|
+
callback=self.parse_wti,
|
|
199
|
+
cb_kwargs=dict(page=self.last_page_wti),
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
def parse_wti(self, response, page):
|
|
203
|
+
print(f"Parsing ...wti {page} page", flush=True)
|
|
204
|
+
item = items.MIitems()
|
|
205
|
+
# 원유 스크랩
|
|
206
|
+
time.sleep(WAIT)
|
|
207
|
+
self.logger.info(response.url)
|
|
208
|
+
for i in range(7, 0, -1):
|
|
209
|
+
item['title'] = 'wti'
|
|
210
|
+
item['date'] = (response.xpath(f'/html/body/div/table/tbody/tr[{i}]/td[1]/text()')
|
|
211
|
+
.get().replace('\t', '').replace('\n', ''))
|
|
212
|
+
item['value'] = (response.xpath(f'/html/body/div/table/tbody/tr[{i}]/td[2]/text()')
|
|
213
|
+
.get().replace(',', '').replace('\t', '').replace('\n', ''))
|
|
214
|
+
self.logger.info(f"date : {item['date']}, value : {item['value']}")
|
|
215
|
+
yield item
|
|
216
|
+
if page > 1:
|
|
217
|
+
yield scrapy.Request(
|
|
218
|
+
url=f'https://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=OIL_CL&fdtc=2&page={page - 1}',
|
|
219
|
+
callback=self.parse_wti,
|
|
220
|
+
cb_kwargs=dict(page=page - 1),
|
|
221
|
+
)
|
|
222
|
+
else:
|
|
223
|
+
yield scrapy.Request(
|
|
224
|
+
url=f'https://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=CMDT_GC&fdtc=2&page={self.last_page_gold}',
|
|
225
|
+
callback=self.parse_gold,
|
|
226
|
+
cb_kwargs=dict(page=self.last_page_gold),
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
def parse_gold(self, response, page):
|
|
230
|
+
print(f"Parsing ...gold {page} page", flush=True)
|
|
231
|
+
item = items.MIitems()
|
|
232
|
+
# 금 스크랩
|
|
233
|
+
time.sleep(WAIT)
|
|
234
|
+
self.logger.info(response.url)
|
|
235
|
+
for i in range(7, 0, -1):
|
|
236
|
+
item['title'] = 'gold'
|
|
237
|
+
item['date'] = (response.xpath(f'/html/body/div/table/tbody/tr[{i}]/td[1]/text()')
|
|
238
|
+
.get().replace('\t', '').replace('\n', ''))
|
|
239
|
+
item['value'] = (response.xpath(f'/html/body/div/table/tbody/tr[{i}]/td[2]/text()')
|
|
240
|
+
.get().replace(',', '').replace('\t', '').replace('\n', ''))
|
|
241
|
+
self.logger.info(f"date : {item['date']}, value : {item['value']}")
|
|
242
|
+
yield item
|
|
243
|
+
if page > 1:
|
|
244
|
+
yield scrapy.Request(
|
|
245
|
+
url=f'https://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=CMDT_GC&fdtc=2&page={page - 1}',
|
|
246
|
+
callback=self.parse_gold,
|
|
247
|
+
cb_kwargs=dict(page=page - 1),
|
|
248
|
+
)
|
|
249
|
+
else:
|
|
250
|
+
yield scrapy.Request(
|
|
251
|
+
url=f'https://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=CMDT_SI&fdtc=2&page={self.last_page_silver}',
|
|
252
|
+
callback=self.parse_silver,
|
|
253
|
+
cb_kwargs=dict(page=self.last_page_silver),
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
def parse_silver(self, response, page):
|
|
257
|
+
print(f"Parsing ...silver {page} page", flush=True)
|
|
258
|
+
item = items.MIitems()
|
|
259
|
+
# 은 스크랩
|
|
260
|
+
time.sleep(WAIT)
|
|
261
|
+
self.logger.info(response.url)
|
|
262
|
+
for i in range(7, 0, -1):
|
|
263
|
+
item['title'] = 'silver'
|
|
264
|
+
item['date'] = (response.xpath(f'/html/body/div/table/tbody/tr[{i}]/td[1]/text()')
|
|
265
|
+
.get().replace('\t', '').replace('\n', ''))
|
|
266
|
+
item['value'] = (response.xpath(f'/html/body/div/table/tbody/tr[{i}]/td[2]/text()')
|
|
267
|
+
.get().replace(',', '').replace('\t', '').replace('\n', ''))
|
|
268
|
+
self.logger.info(f"date : {item['date']}, value : {item['value']}")
|
|
269
|
+
yield item
|
|
270
|
+
if page > 1:
|
|
271
|
+
yield scrapy.Request(
|
|
272
|
+
url=f'https://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=CMDT_SI&fdtc=2&page={page - 1}',
|
|
273
|
+
callback=self.parse_silver,
|
|
274
|
+
cb_kwargs=dict(page=page - 1),
|
|
275
|
+
)
|
|
276
|
+
else:
|
|
277
|
+
yield scrapy.Request(
|
|
278
|
+
url=f'https://finance.naver.com/world/sise.nhn?symbol=SPI@SPX',
|
|
279
|
+
callback=self.parse_sp500,
|
|
280
|
+
cb_kwargs=dict(page=self.last_page_sp500),
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
def parse_sp500(self, response, page):
|
|
284
|
+
print(f"Parsing ...sp500 {page} page", flush=True)
|
|
285
|
+
item = items.MIitems()
|
|
286
|
+
# S&P500을 스크랩하는코드
|
|
287
|
+
time.sleep(WAIT)
|
|
288
|
+
self.logger.info(response.url)
|
|
289
|
+
self.driver.get(response.url)
|
|
290
|
+
time.sleep(WAIT)
|
|
291
|
+
next1 = '//*[@id="dayPaging"]/a[11]' # 첫페이지의 '다음' 버튼
|
|
292
|
+
next2 = '//*[@id="dayPaging"]/a[12]' # 첫페이지 이후의 '다음' 버튼
|
|
293
|
+
self.driver.find_element_by_xpath(next1).click()
|
|
294
|
+
time.sleep(WAIT)
|
|
295
|
+
'''
|
|
296
|
+
self.driver.find_element_by_xpath(next2).click()
|
|
297
|
+
time.sleep(1)
|
|
298
|
+
self.driver.find_element_by_xpath(next2).click()
|
|
299
|
+
time.sleep(1)
|
|
300
|
+
self.driver.find_element_by_xpath(next2).click()
|
|
301
|
+
time.sleep(1)
|
|
302
|
+
'''
|
|
303
|
+
for j in range(page, 0, -1):
|
|
304
|
+
if j % 10 == 0:
|
|
305
|
+
prev = '//*[@id="dayPaging"]/a[1]'
|
|
306
|
+
self.driver.find_element_by_xpath(prev).click()
|
|
307
|
+
self.logger.info('click prev...')
|
|
308
|
+
time.sleep(1)
|
|
309
|
+
link = f'//*[@id="dayLink{j}"]'
|
|
310
|
+
while True:
|
|
311
|
+
try:
|
|
312
|
+
self.driver.find_element_by_xpath(link).click()
|
|
313
|
+
self.logger.info(f'click {j} button..')
|
|
314
|
+
break
|
|
315
|
+
except NoSuchElementException as e:
|
|
316
|
+
self.logger.error(f'Error : {e}')
|
|
317
|
+
self.driver.find_element_by_xpath(next2).click()
|
|
318
|
+
time.sleep(1)
|
|
319
|
+
time.sleep(1)
|
|
320
|
+
sel = Selector(text=self.driver.page_source)
|
|
321
|
+
|
|
322
|
+
for i in range(10, 0, -1):
|
|
323
|
+
item['title'] = 'sp500'
|
|
324
|
+
item['date'] = sel.xpath(f'//*[@id="dayTable"]/tbody/tr[{i}]/td[1]/text()').get()
|
|
325
|
+
item['value'] = (sel.xpath(f'//*[@id="dayTable"]/tbody/tr[{i}]/td[2]/span/text()')
|
|
326
|
+
.get().replace(',', ''))
|
|
327
|
+
self.logger.info(f"date : {item['date']}, value : {item['value']}")
|
|
328
|
+
yield item
|
|
329
|
+
|
|
330
|
+
def __del__(self):
|
|
331
|
+
if self.driver is not None:
|
|
332
|
+
print('Retrieve chrome driver...')
|
|
333
|
+
self.driver.quit()
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import scrapy
|
|
2
|
+
from mi import items
|
|
3
|
+
|
|
4
|
+
# cmd usage : scrapy crawl silver
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class SilverSpider(scrapy.Spider):
|
|
8
|
+
name = 'silver'
|
|
9
|
+
allowed_domains = ['finance.naver.com']
|
|
10
|
+
start_urls = ['https://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=CMDT_SI&fdtc=2']
|
|
11
|
+
|
|
12
|
+
def parse(self, response):
|
|
13
|
+
print(f"Parsing ...'{self.name}' page", flush=True)
|
|
14
|
+
item = items.MIitems()
|
|
15
|
+
# Silver를 스크랩하는코드
|
|
16
|
+
self.logger.info(response.url)
|
|
17
|
+
# 최근 3개일의 데이터를 스크랩한다.
|
|
18
|
+
for r in range(3, 0, -1):
|
|
19
|
+
item['title'] = self.name
|
|
20
|
+
item['date'] = (response.xpath(f'/html/body/div/table/tbody/tr[{r}]/td[1]/text()')
|
|
21
|
+
.get().replace('\t', '').replace('\n', ''))
|
|
22
|
+
item['value'] = (response.xpath(f'/html/body/div/table/tbody/tr[{r}]/td[2]/text()')
|
|
23
|
+
.get().replace(',', '').replace('\t', '').replace('\n', ''))
|
|
24
|
+
self.logger.info(f"date : {item['date']}, value : {item['value']}")
|
|
25
|
+
yield item
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import scrapy
|
|
2
|
+
from mi import items
|
|
3
|
+
|
|
4
|
+
# cmd usage : scrapy crawl sp500
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Sp500Spider(scrapy.Spider):
|
|
8
|
+
name = 'sp500'
|
|
9
|
+
allowed_domains = ['finance.naver.com']
|
|
10
|
+
start_urls = ['https://finance.naver.com/world/sise.nhn?symbol=SPI@SPX']
|
|
11
|
+
|
|
12
|
+
def parse(self, response):
|
|
13
|
+
print(f"Parsing ...'{self.name}' page", flush=True)
|
|
14
|
+
item = items.MIitems()
|
|
15
|
+
# S&P500를 스크랩하는코드
|
|
16
|
+
self.logger.info(response.url)
|
|
17
|
+
# 최근 3개일의 데이터를 스크랩한다.
|
|
18
|
+
for r in range(3, 0, -1):
|
|
19
|
+
item['title'] = self.name
|
|
20
|
+
item['date'] = response.xpath(f'//*[@id="dayTable"]/tbody/tr[{r}]/td[1]/text()').get()
|
|
21
|
+
item['value'] = (response.xpath(f'//*[@id="dayTable"]/tbody/tr[{r}]/td[2]/span/text()')
|
|
22
|
+
.get().replace(',', ''))
|
|
23
|
+
self.logger.info(f"date : {item['date']}, value : {item['value']}")
|
|
24
|
+
yield item
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import scrapy
|
|
2
|
+
from mi import items
|
|
3
|
+
|
|
4
|
+
# cmd usage : scrapy crawl usdkrw
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class UsdidxSpider(scrapy.Spider):
|
|
8
|
+
name = 'usdidx'
|
|
9
|
+
allowed_domains = ['finance.naver.com']
|
|
10
|
+
start_urls = ['https://finance.naver.com/marketindex/worldExchangeDetail.nhn?marketindexCd=FX_USDX']
|
|
11
|
+
|
|
12
|
+
def parse(self, response):
|
|
13
|
+
print(f"Parsing ...'{self.name}' page", flush=True)
|
|
14
|
+
item = items.MIitems()
|
|
15
|
+
# Dollar Index를 스크랩하는코드
|
|
16
|
+
self.logger.info(response.url)
|
|
17
|
+
|
|
18
|
+
# 최근 데이터를 스크랩한다.
|
|
19
|
+
# date - //*[@id="content"]/div[1]/div[2]/span[1]
|
|
20
|
+
# value - //*[@id="content"]/div[1]/div[1]/p[1]/em
|
|
21
|
+
value = []
|
|
22
|
+
for span in response.xpath(f'//*[@id="content"]/div[1]/div[1]/p[1]/em/span'):
|
|
23
|
+
value.append(span.xpath('text()').get())
|
|
24
|
+
|
|
25
|
+
item['title'] = self.name
|
|
26
|
+
item['date'] = response.xpath('//*[@id="content"]/div[1]/div[2]/span[1]/text()').get()
|
|
27
|
+
item['value'] = ''.join(value)
|
|
28
|
+
|
|
29
|
+
self.logger.info(f"date : {item['date']}, value : {item['value']}")
|
|
30
|
+
yield item
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import scrapy
|
|
2
|
+
from mi import items
|
|
3
|
+
|
|
4
|
+
# cmd usage : scrapy crawl usdkrw
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class UsdkrwSpider(scrapy.Spider):
|
|
8
|
+
name = 'usdkrw'
|
|
9
|
+
allowed_domains = ['finance.naver.com']
|
|
10
|
+
start_urls = ['https://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_USDKRW']
|
|
11
|
+
|
|
12
|
+
def parse(self, response):
|
|
13
|
+
print(f"Parsing ...'{self.name}' page", flush=True)
|
|
14
|
+
item = items.MIitems()
|
|
15
|
+
# S&P500를 스크랩하는코드
|
|
16
|
+
self.logger.info(response.url)
|
|
17
|
+
# 최근 3개일의 데이터를 스크랩한다.
|
|
18
|
+
for r in range(3, 0, -1):
|
|
19
|
+
item['title'] = self.name
|
|
20
|
+
item['date'] = response.xpath(f'/html/body/div/table/tbody/tr[{r}]/td[1]/text()').get()
|
|
21
|
+
item['value'] = (response.xpath(f'/html/body/div/table/tbody/tr[{r}]/td[2]/text()')
|
|
22
|
+
.get().replace(',', ''))
|
|
23
|
+
self.logger.info(f"date : {item['date']}, value : {item['value']}")
|
|
24
|
+
yield item
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import scrapy
|
|
2
|
+
from mi import items
|
|
3
|
+
|
|
4
|
+
# cmd usage : scrapy crawl wti
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class WtiSpider(scrapy.Spider):
|
|
8
|
+
name = 'wti'
|
|
9
|
+
allowed_domains = ['finance.naver.com']
|
|
10
|
+
start_urls = ['https://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=OIL_CL&fdtc=2']
|
|
11
|
+
|
|
12
|
+
def parse(self, response):
|
|
13
|
+
print(f"Parsing ...'{self.name}' page", flush=True)
|
|
14
|
+
item = items.MIitems()
|
|
15
|
+
# S&P500를 스크랩하는코드
|
|
16
|
+
self.logger.info(response.url)
|
|
17
|
+
# 최근 3개일의 데이터를 스크랩한다.
|
|
18
|
+
for r in range(3, 0, -1):
|
|
19
|
+
item['title'] = self.name
|
|
20
|
+
item['date'] = (response.xpath(f'/html/body/div/table/tbody/tr[{r}]/td[1]/text()')
|
|
21
|
+
.get().replace('\t', '').replace('\n', ''))
|
|
22
|
+
item['value'] = (response.xpath(f'/html/body/div/table/tbody/tr[{r}]/td[2]/text()')
|
|
23
|
+
.get().replace(',', '').replace('\t', '').replace('\n', ''))
|
|
24
|
+
self.logger.info(f"date : {item['date']}, value : {item['value']}")
|
|
25
|
+
yield item
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
from scrapy.crawler import CrawlerProcess
|
|
5
|
+
from scrapy.utils.project import get_project_settings
|
|
6
|
+
from multiprocessing import Process
|
|
7
|
+
|
|
8
|
+
from .mi import calc
|
|
9
|
+
from db_hj3415 import mongo2, dbpath
|
|
10
|
+
import datetime
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
formatter = logging.Formatter('%(levelname)s: [%(name)s] %(message)s')
|
|
16
|
+
ch = logging.StreamHandler()
|
|
17
|
+
ch.setFormatter(formatter)
|
|
18
|
+
logger.addHandler(ch)
|
|
19
|
+
logger.setLevel(logging.ERROR)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def chcwd(func):
|
|
23
|
+
"""
|
|
24
|
+
scrapy는 항상 프로젝트 내부에서 실행해야 하기 때문에 일시적으로 현재 실행 경로를 변경해주는 목적의 데코레이션 함수
|
|
25
|
+
:param func:
|
|
26
|
+
:return:
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def wrapper(*args, **kwargs):
|
|
30
|
+
before_cwd = os.getcwd()
|
|
31
|
+
logger.info(f'current path : {before_cwd}')
|
|
32
|
+
after_cwd = os.path.dirname(os.path.realpath(__file__))
|
|
33
|
+
logger.info(f'change path to {after_cwd}')
|
|
34
|
+
os.chdir(after_cwd)
|
|
35
|
+
func(*args, **kwargs)
|
|
36
|
+
logger.info(f'restore path to {before_cwd}')
|
|
37
|
+
os.chdir(before_cwd)
|
|
38
|
+
|
|
39
|
+
return wrapper
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _use_single(spider):
|
|
43
|
+
# reference from https://docs.scrapy.org/en/latest/topics/practices.html(코드로 스파이더 실행하기)
|
|
44
|
+
process = CrawlerProcess(get_project_settings())
|
|
45
|
+
process.crawl(spider)
|
|
46
|
+
process.start()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@chcwd
|
|
50
|
+
def mi():
|
|
51
|
+
spider_list = ('aud', 'chf', 'gbond3y', 'gold', 'kosdaq', 'kospi', 'silver', 'sp500', 'usdidx', 'usdkrw', 'wti',)
|
|
52
|
+
print('*' * 25, f"Scrape multiprocess mi", '*' * 25)
|
|
53
|
+
logger.info(spider_list)
|
|
54
|
+
|
|
55
|
+
start_time = time.time()
|
|
56
|
+
ths = []
|
|
57
|
+
error = False
|
|
58
|
+
for spider in spider_list:
|
|
59
|
+
ths.append(Process(target=_use_single, args=(spider,)))
|
|
60
|
+
for i in range(len(ths)):
|
|
61
|
+
ths[i].start()
|
|
62
|
+
for i in range(len(ths)):
|
|
63
|
+
ths[i].join()
|
|
64
|
+
if ths[i].exitcode != 0:
|
|
65
|
+
error = True
|
|
66
|
+
|
|
67
|
+
# calc 모듈을 이용해서 avg_per 과 yield_gap 을 계산하여 저장한다.
|
|
68
|
+
print('*' * 25, f"Calculate and save avgper and yieldgap", '*' * 25)
|
|
69
|
+
client = mongo2.connect_mongo(dbpath.load())
|
|
70
|
+
mi_mongo2 = mongo2.MI(client, 'avgper')
|
|
71
|
+
# mi_sqlite = sqlite.MI()
|
|
72
|
+
today_str = datetime.datetime.today().strftime('%Y.%m.%d')
|
|
73
|
+
|
|
74
|
+
avgper = calc.avg_per()
|
|
75
|
+
avgper_dict = {'date': today_str, 'value': str(avgper)}
|
|
76
|
+
logger.info(avgper_dict)
|
|
77
|
+
mi_mongo2.save(mi_dict=avgper_dict, index='avgper')
|
|
78
|
+
print(f'\tSave to mongo... date : {today_str} / title : avgper / value : {avgper}')
|
|
79
|
+
#mi_sqlite.save(mi_dict=avgper_dict, index='avgper')
|
|
80
|
+
#print(f'\tSave to sqlite... date : {today_str} / title : avgper / value : {avgper}')
|
|
81
|
+
|
|
82
|
+
yieldgap = calc.yield_gap(client, avgper)
|
|
83
|
+
yieldgap_dict = {'date': today_str, 'value': str(yieldgap)}
|
|
84
|
+
logger.info(yieldgap_dict)
|
|
85
|
+
mi_mongo2.save(mi_dict=yieldgap_dict, index='yieldgap')
|
|
86
|
+
print(f'\tSave to mongo... date : {today_str} / title : yieldgap / value : {yieldgap}')
|
|
87
|
+
#mi_sqlite.save(mi_dict=yieldgap_dict, index='yieldgap')
|
|
88
|
+
#print(f'\tSave to sqlite... date : {today_str} / title : yieldgap / value : {yieldgap}')
|
|
89
|
+
|
|
90
|
+
print(f'Total spent time : {round(time.time() - start_time, 2)} sec')
|
|
91
|
+
print('done.')
|
|
92
|
+
return error
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@chcwd
|
|
96
|
+
def _mi_test(spider: str):
|
|
97
|
+
_use_single(spider=spider)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@chcwd
|
|
101
|
+
def mihistory(year: int):
|
|
102
|
+
process = CrawlerProcess(get_project_settings())
|
|
103
|
+
process.crawl('mihistory', year=year)
|
|
104
|
+
process.start()
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
"""avgper 과 yieldgap 계산
|
|
108
|
+
"""
|
|
109
|
+
import math
|
|
110
|
+
from db_hj3415 import mongo2, dbpath
|
|
111
|
+
from eval_hj3415 import eval
|
|
112
|
+
from util_hj3415 import utils
|
|
113
|
+
|
|
114
|
+
import logging
|
|
115
|
+
logger = logging.getLogger(__name__)
|
|
116
|
+
formatter = logging.Formatter('%(levelname)s: [%(name)s] %(message)s')
|
|
117
|
+
ch = logging.StreamHandler()
|
|
118
|
+
ch.setFormatter(formatter)
|
|
119
|
+
logger.addHandler(ch)
|
|
120
|
+
logger.setLevel(logging.WARNING)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def avg_per() -> float:
|
|
124
|
+
# 가중조화평균으로 평균 per 산출 mi db에 저장
|
|
125
|
+
per_r_cap_all = []
|
|
126
|
+
cap_all = []
|
|
127
|
+
eval_list = eval.make_today_eval_df(dbpath.load()).to_dict('records')
|
|
128
|
+
for data in eval_list:
|
|
129
|
+
# eval data: {'code': '111870', '종목명': 'KH 일렉트론', '주가': 1070, 'PER': -2.28, 'PBR': 0.96,
|
|
130
|
+
# '시가총액': 103300000000, 'RED': -11055.0, '주주수익률': -7.13, '이익지표': -0.30426, 'ROIC': -40.31,
|
|
131
|
+
# 'ROE': 0.0, 'PFCF': -7.7, 'PCR': nan}
|
|
132
|
+
logger.debug(f'eval data: {data}')
|
|
133
|
+
if math.isnan(data['PER']) or data['PER'] == 0:
|
|
134
|
+
continue
|
|
135
|
+
if math.isnan(data['시가총액']):
|
|
136
|
+
continue
|
|
137
|
+
cap_all.append(data['시가총액'])
|
|
138
|
+
per_r_cap_all.append((1 / data['PER']) * data['시가총액'])
|
|
139
|
+
logger.debug(f'Count cap_all :{len(cap_all)}')
|
|
140
|
+
logger.debug(f'Count per_r_cap_all : {len(per_r_cap_all)}')
|
|
141
|
+
try:
|
|
142
|
+
return round(sum(cap_all) / sum(per_r_cap_all), 2)
|
|
143
|
+
except ZeroDivisionError:
|
|
144
|
+
return float('nan')
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def yield_gap(client, avg_per: float) -> float:
|
|
148
|
+
# 장고에서 사용할 yield gap, mi db에 저장
|
|
149
|
+
date, gbond3y = mongo2.MI(client, index='gbond3y').get_recent()
|
|
150
|
+
if math.isnan(avg_per) or avg_per == 0:
|
|
151
|
+
return float('nan')
|
|
152
|
+
else:
|
|
153
|
+
yield_share = (1 / avg_per) * 100
|
|
154
|
+
yield_gap = round(yield_share - utils.to_float(gbond3y), 2)
|
|
155
|
+
logger.debug(f"Date - {date}, gbond3y - {gbond3y}, yield_gap - {yield_gap}")
|
|
156
|
+
return yield_gap
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# Automatically created by: scrapy startproject
|
|
2
|
+
#
|
|
3
|
+
# For more information about the [deploy] section see:
|
|
4
|
+
# https://scrapyd.readthedocs.io/en/latest/deploy.html
|
|
5
|
+
|
|
6
|
+
[settings]
|
|
7
|
+
default = mi.settings
|
|
8
|
+
|
|
9
|
+
[deploy]
|
|
10
|
+
#url = http://localhost:6800/
|
|
11
|
+
project = mi
|
|
@@ -32,3 +32,27 @@ class C101items(scrapy.Item):
|
|
|
32
32
|
intro1 = scrapy.Field()
|
|
33
33
|
intro2 = scrapy.Field()
|
|
34
34
|
intro3 = scrapy.Field()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class C103items(scrapy.Item):
|
|
38
|
+
코드 = scrapy.Field()
|
|
39
|
+
title = scrapy.Field()
|
|
40
|
+
df = scrapy.Field()
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class C104items(scrapy.Item):
|
|
44
|
+
코드 = scrapy.Field()
|
|
45
|
+
title = scrapy.Field()
|
|
46
|
+
df = scrapy.Field()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class C106items(scrapy.Item):
|
|
50
|
+
코드 = scrapy.Field()
|
|
51
|
+
title = scrapy.Field() # y or q
|
|
52
|
+
df = scrapy.Field()
|
|
53
|
+
|
|
54
|
+
"""
|
|
55
|
+
class C108items(scrapy.Item):
|
|
56
|
+
코드 = scrapy.Field()
|
|
57
|
+
df = scrapy.Field()
|
|
58
|
+
"""
|