scraper2-hj3415 0.0.2__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. scraper2_hj3415/cli.py +40 -3
  2. scraper2_hj3415/miscrapy/mi/__init__.py +0 -0
  3. scraper2_hj3415/miscrapy/mi/items.py +7 -0
  4. scraper2_hj3415/miscrapy/mi/middlewares.py +103 -0
  5. scraper2_hj3415/miscrapy/mi/pipelines.py +39 -0
  6. scraper2_hj3415/miscrapy/mi/settings.py +103 -0
  7. scraper2_hj3415/miscrapy/mi/spiders/__init__.py +4 -0
  8. scraper2_hj3415/miscrapy/mi/spiders/aud.py +23 -0
  9. scraper2_hj3415/miscrapy/mi/spiders/chf.py +25 -0
  10. scraper2_hj3415/miscrapy/mi/spiders/gbond3y.py +24 -0
  11. scraper2_hj3415/miscrapy/mi/spiders/gold.py +25 -0
  12. scraper2_hj3415/miscrapy/mi/spiders/kosdaq.py +23 -0
  13. scraper2_hj3415/miscrapy/mi/spiders/kospi.py +23 -0
  14. scraper2_hj3415/miscrapy/mi/spiders/mihistory.py +333 -0
  15. scraper2_hj3415/miscrapy/mi/spiders/silver.py +25 -0
  16. scraper2_hj3415/miscrapy/mi/spiders/sp500.py +24 -0
  17. scraper2_hj3415/miscrapy/mi/spiders/usdidx.py +30 -0
  18. scraper2_hj3415/miscrapy/mi/spiders/usdkrw.py +24 -0
  19. scraper2_hj3415/miscrapy/mi/spiders/wti.py +25 -0
  20. scraper2_hj3415/miscrapy/run.py +156 -0
  21. scraper2_hj3415/miscrapy/scrapy.cfg +11 -0
  22. scraper2_hj3415/nfscrapy/nfs/items.py +24 -0
  23. scraper2_hj3415/nfscrapy/nfs/pipelines.py +94 -16
  24. scraper2_hj3415/nfscrapy/nfs/settings.py +2 -1
  25. scraper2_hj3415/nfscrapy/nfs/spiders/_c108.py +95 -0
  26. scraper2_hj3415/nfscrapy/nfs/spiders/c101.py +3 -11
  27. scraper2_hj3415/nfscrapy/nfs/spiders/c103.py +174 -0
  28. scraper2_hj3415/nfscrapy/nfs/spiders/c104.py +228 -0
  29. scraper2_hj3415/nfscrapy/nfs/spiders/c106.py +92 -0
  30. scraper2_hj3415/nfscrapy/nfs/spiders/common.py +60 -0
  31. scraper2_hj3415/nfscrapy/run.py +76 -26
  32. {scraper2_hj3415-0.0.2.dist-info → scraper2_hj3415-0.1.0.dist-info}/METADATA +1 -1
  33. scraper2_hj3415-0.1.0.dist-info/RECORD +42 -0
  34. scraper2_hj3415-0.1.0.dist-info/entry_points.txt +2 -0
  35. scraper2_hj3415-0.0.2.dist-info/RECORD +0 -17
  36. scraper2_hj3415-0.0.2.dist-info/entry_points.txt +0 -2
  37. {scraper2_hj3415-0.0.2.dist-info → scraper2_hj3415-0.1.0.dist-info}/WHEEL +0 -0
  38. {scraper2_hj3415-0.0.2.dist-info → scraper2_hj3415-0.1.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,333 @@
1
+ import time
2
+ import scrapy
3
+ from util_hj3415 import utils
4
+ from scrapy.selector import Selector
5
+ from selenium.common.exceptions import NoSuchElementException
6
+ from mi import items
7
+
8
+ # cmd usage : scrapy crawl mihistory -a year=1
9
+
10
+
11
+ WAIT = 1
12
+
13
+
14
+ class MIHistory(scrapy.Spider):
15
+ name = 'mihistory'
16
+ allowed_domains = ['finance.naver.com']
17
+
18
+ def __init__(self, year=1):
19
+ self.year = int(year)
20
+ self.driver = utils.get_driver()
21
+ if self.driver is None:
22
+ raise
23
+ # 대략1년전 kospi, kosdaq -> 42, gbond3y -> 38, s&p -> 27, usdkrw -> 26, wti -> 38, gold -> 38, audchf -> 46
24
+ self.last_page_kospi_kosdaq = 42 * self.year
25
+ self.last_page_3bond3y = 38 * self.year
26
+ self.last_page_sp500 = 27 * self.year
27
+ self.last_page_usdkrw = 26 * self.year
28
+ self.last_page_wti = 38 * self.year
29
+ self.last_page_gold = 38 * self.year
30
+ self.last_page_silver = 38 * self.year
31
+ self.last_page_audchf = 46 * self.year
32
+ self.item_list = []
33
+ self.aud_dict = {}
34
+ self.audchf_dict = {}
35
+
36
+ def start_requests(self):
37
+ # reference from https://docs.scrapy.org/en/latest/topics/request-response.html
38
+ print(f'Parsing Market Index history...{self.year} year..')
39
+ yield scrapy.Request(
40
+ url=f'https://finance.naver.com/sise/sise_index_day.nhn?code=KOSPI&page={self.last_page_kospi_kosdaq}',
41
+ callback=self.parse_kospi,
42
+ cb_kwargs=dict(page=self.last_page_kospi_kosdaq),
43
+ )
44
+
45
+ def parse_kospi(self, response, page):
46
+ print(f"Parsing ...kospi {page} page", flush=True)
47
+ item = items.MIitems()
48
+ # KOSPI를 스크랩하는코드
49
+ time.sleep(WAIT)
50
+ self.logger.info(response.url)
51
+ for i in [12, 11, 10, 5, 4, 3]:
52
+ item['title'] = 'kospi'
53
+ item['date'] = response.xpath(f'/html/body/div/table[1]/tr[{i}]/td[1]/text()').get()
54
+ item['value'] = response.xpath(f'/html/body/div/table[1]/tr[{i}]/td[2]/text()').get().replace(',', '')
55
+ self.logger.info(f"date : {item['date']}, value : {item['value']}")
56
+ yield item
57
+ if page > 1:
58
+ yield scrapy.Request(
59
+ url=f'https://finance.naver.com/sise/sise_index_day.nhn?code=KOSPI&page={page - 1}',
60
+ callback=self.parse_kospi,
61
+ cb_kwargs=dict(page=page - 1),
62
+ )
63
+ else:
64
+ yield scrapy.Request(
65
+ url=f'https://finance.naver.com/sise/sise_index_day.nhn?code=KOSDAQ&page={self.last_page_kospi_kosdaq}',
66
+ callback=self.parse_kosdaq,
67
+ cb_kwargs=dict(page=self.last_page_kospi_kosdaq),
68
+ )
69
+
70
+ def parse_kosdaq(self, response, page):
71
+ print(f"Parsing ...kosdaq {page} page", flush=True)
72
+ item = items.MIitems()
73
+ # KOSDAQ를 스크랩하는코드
74
+ time.sleep(WAIT)
75
+ self.logger.info(response.url)
76
+ for i in [12, 11, 10, 5, 4, 3]:
77
+ item['title'] = 'kosdaq'
78
+ item['date'] = response.xpath(f'/html/body/div/table[1]/tr[{i}]/td[1]/text()').get()
79
+ item['value'] = response.xpath(f'/html/body/div/table[1]/tr[{i}]/td[2]/text()').get().replace(',', '')
80
+ self.logger.info(f"date : {item['date']}, value : {item['value']}")
81
+ yield item
82
+ if page > 1:
83
+ yield scrapy.Request(
84
+ url=f'https://finance.naver.com/sise/sise_index_day.nhn?code=KOSDAQ&page={page - 1}',
85
+ callback=self.parse_kosdaq,
86
+ cb_kwargs=dict(page=page - 1),
87
+ )
88
+ else:
89
+ yield scrapy.Request(
90
+ url=f'https://finance.naver.com/marketindex/interestDailyQuote.nhn?marketindexCd=IRR_GOVT03Y&page={self.last_page_3bond3y}',
91
+ callback=self.parse_gbond3y,
92
+ cb_kwargs=dict(page=self.last_page_3bond3y),
93
+ )
94
+
95
+ def parse_gbond3y(self, response, page):
96
+ print(f"Parsing ...gbond3y {page} page", flush=True)
97
+ item = items.MIitems()
98
+ # 국고채 3년금리를 스크랩하는코드
99
+ time.sleep(WAIT)
100
+ self.logger.info(response.url)
101
+ for i in range(7, 0, -1):
102
+ item['title'] = 'gbond3y'
103
+ item['date'] = (response.css(f'body > div > table > tbody > tr:nth-child({i}) > td.date::text')
104
+ .extract()[0].replace('\n', '').replace('\t', ''))
105
+ item['value'] = (response.css(f'body > div > table > tbody > tr:nth-child({i}) > td:nth-child(2)::text')
106
+ .extract()[0])
107
+ self.logger.info(f"date : {item['date']}, value : {item['value']}")
108
+ yield item
109
+ if page > 1:
110
+ yield scrapy.Request(
111
+ url=f'https://finance.naver.com/marketindex/interestDailyQuote.nhn?marketindexCd=IRR_GOVT03Y&page={page - 1}',
112
+ callback=self.parse_gbond3y,
113
+ cb_kwargs=dict(page=page - 1),
114
+ )
115
+ else:
116
+ yield scrapy.Request(
117
+ url=f'https://finance.naver.com/marketindex/worldDailyQuote.nhn?fdtc=4&marketindexCd=FX_USDAUD',
118
+ callback=self.parse_aud,
119
+ cb_kwargs=dict(page=self.last_page_audchf),
120
+ )
121
+
122
+ def parse_aud(self, response, page):
123
+ print(f"Parsing ...aud {page} page", flush=True)
124
+ item = items.MIitems()
125
+ # AUD를 스크랩하는코드
126
+ time.sleep(WAIT)
127
+ self.logger.info(response.url)
128
+ for i in range(7, 0, -1):
129
+ item['title'] = 'aud'
130
+ item['date'] = (response.xpath(f'/html/body/div/table/tbody/tr[{i}]/td[1]/text()')
131
+ .get().replace('\t', '').replace('\n', ''))
132
+ item['value'] = (response.xpath(f'/html/body/div/table/tbody/tr[{i}]/td[2]/text()')
133
+ .get().replace(',', '').replace('\t', '').replace('\n', ''))
134
+ self.logger.info(f"date : {item['date']}, value : {item['value']}")
135
+ yield item
136
+ if page > 1:
137
+ yield scrapy.Request(
138
+ url=f'https://finance.naver.com/marketindex/worldDailyQuote.nhn?fdtc=4&marketindexCd=FX_USDAUD&page={page - 1}',
139
+ callback=self.parse_aud,
140
+ cb_kwargs=dict(page=page - 1),
141
+ )
142
+ else:
143
+ yield scrapy.Request(
144
+ url=f'https://finance.naver.com/marketindex/worldDailyQuote.nhn?fdtc=4&marketindexCd=FX_USDCHF',
145
+ callback=self.parse_chf,
146
+ cb_kwargs=dict(page=self.last_page_audchf),
147
+ )
148
+
149
+ def parse_chf(self, response, page):
150
+ print(f"Parsing ...chf {page} page", flush=True)
151
+ item = items.MIitems()
152
+ # CHF를 스크랩하는코드
153
+ time.sleep(WAIT)
154
+ self.logger.info(response.url)
155
+ for i in range(7, 0, -1):
156
+ item['title'] = 'chf'
157
+ item['date'] = (response.xpath(f'/html/body/div/table/tbody/tr[{i}]/td[1]/text()')
158
+ .get().replace('\t', '').replace('\n', ''))
159
+ item['value'] = (response.xpath(f'/html/body/div/table/tbody/tr[{i}]/td[2]/text()')
160
+ .get().replace(',', '').replace('\t', '').replace('\n', ''))
161
+ self.logger.info(f"date : {item['date']}, value : {item['value']}")
162
+ yield item
163
+ if page > 1:
164
+ yield scrapy.Request(
165
+ url=f'https://finance.naver.com/marketindex/worldDailyQuote.nhn?fdtc=4&marketindexCd=FX_USDCHF&page={page - 1}',
166
+ callback=self.parse_chf,
167
+ cb_kwargs=dict(page=page - 1),
168
+ )
169
+ else:
170
+ yield scrapy.Request(
171
+ url=f'https://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_USDKRW&page={self.last_page_usdkrw}',
172
+ callback=self.parse_usdkrw,
173
+ cb_kwargs=dict(page=self.last_page_usdkrw),
174
+ )
175
+
176
+ def parse_usdkrw(self, response, page):
177
+ print(f"Parsing ...usdkrw {page} page", flush=True)
178
+ item = items.MIitems()
179
+ # 달러 원화 환율 스크랩
180
+ time.sleep(WAIT)
181
+ self.logger.info(response.url)
182
+ for i in range(10, 0, -1):
183
+ item['title'] = 'usdkrw'
184
+ item['date'] = response.xpath(f'/html/body/div/table/tbody/tr[{i}]/td[1]/text()').get()
185
+ item['value'] = (response.xpath(f'/html/body/div/table/tbody/tr[{i}]/td[2]/text()')
186
+ .get().replace(',', ''))
187
+ self.logger.info(f"date : {item['date']}, value : {item['value']}")
188
+ yield item
189
+ if page > 1:
190
+ yield scrapy.Request(
191
+ url=f'https://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_USDKRW&page={page - 1}',
192
+ callback=self.parse_usdkrw,
193
+ cb_kwargs=dict(page=page - 1),
194
+ )
195
+ else:
196
+ yield scrapy.Request(
197
+ url=f'https://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=OIL_CL&fdtc=2&page={self.last_page_wti}',
198
+ callback=self.parse_wti,
199
+ cb_kwargs=dict(page=self.last_page_wti),
200
+ )
201
+
202
+ def parse_wti(self, response, page):
203
+ print(f"Parsing ...wti {page} page", flush=True)
204
+ item = items.MIitems()
205
+ # 원유 스크랩
206
+ time.sleep(WAIT)
207
+ self.logger.info(response.url)
208
+ for i in range(7, 0, -1):
209
+ item['title'] = 'wti'
210
+ item['date'] = (response.xpath(f'/html/body/div/table/tbody/tr[{i}]/td[1]/text()')
211
+ .get().replace('\t', '').replace('\n', ''))
212
+ item['value'] = (response.xpath(f'/html/body/div/table/tbody/tr[{i}]/td[2]/text()')
213
+ .get().replace(',', '').replace('\t', '').replace('\n', ''))
214
+ self.logger.info(f"date : {item['date']}, value : {item['value']}")
215
+ yield item
216
+ if page > 1:
217
+ yield scrapy.Request(
218
+ url=f'https://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=OIL_CL&fdtc=2&page={page - 1}',
219
+ callback=self.parse_wti,
220
+ cb_kwargs=dict(page=page - 1),
221
+ )
222
+ else:
223
+ yield scrapy.Request(
224
+ url=f'https://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=CMDT_GC&fdtc=2&page={self.last_page_gold}',
225
+ callback=self.parse_gold,
226
+ cb_kwargs=dict(page=self.last_page_gold),
227
+ )
228
+
229
+ def parse_gold(self, response, page):
230
+ print(f"Parsing ...gold {page} page", flush=True)
231
+ item = items.MIitems()
232
+ # 금 스크랩
233
+ time.sleep(WAIT)
234
+ self.logger.info(response.url)
235
+ for i in range(7, 0, -1):
236
+ item['title'] = 'gold'
237
+ item['date'] = (response.xpath(f'/html/body/div/table/tbody/tr[{i}]/td[1]/text()')
238
+ .get().replace('\t', '').replace('\n', ''))
239
+ item['value'] = (response.xpath(f'/html/body/div/table/tbody/tr[{i}]/td[2]/text()')
240
+ .get().replace(',', '').replace('\t', '').replace('\n', ''))
241
+ self.logger.info(f"date : {item['date']}, value : {item['value']}")
242
+ yield item
243
+ if page > 1:
244
+ yield scrapy.Request(
245
+ url=f'https://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=CMDT_GC&fdtc=2&page={page - 1}',
246
+ callback=self.parse_gold,
247
+ cb_kwargs=dict(page=page - 1),
248
+ )
249
+ else:
250
+ yield scrapy.Request(
251
+ url=f'https://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=CMDT_SI&fdtc=2&page={self.last_page_silver}',
252
+ callback=self.parse_silver,
253
+ cb_kwargs=dict(page=self.last_page_silver),
254
+ )
255
+
256
+ def parse_silver(self, response, page):
257
+ print(f"Parsing ...silver {page} page", flush=True)
258
+ item = items.MIitems()
259
+ # 은 스크랩
260
+ time.sleep(WAIT)
261
+ self.logger.info(response.url)
262
+ for i in range(7, 0, -1):
263
+ item['title'] = 'silver'
264
+ item['date'] = (response.xpath(f'/html/body/div/table/tbody/tr[{i}]/td[1]/text()')
265
+ .get().replace('\t', '').replace('\n', ''))
266
+ item['value'] = (response.xpath(f'/html/body/div/table/tbody/tr[{i}]/td[2]/text()')
267
+ .get().replace(',', '').replace('\t', '').replace('\n', ''))
268
+ self.logger.info(f"date : {item['date']}, value : {item['value']}")
269
+ yield item
270
+ if page > 1:
271
+ yield scrapy.Request(
272
+ url=f'https://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=CMDT_SI&fdtc=2&page={page - 1}',
273
+ callback=self.parse_silver,
274
+ cb_kwargs=dict(page=page - 1),
275
+ )
276
+ else:
277
+ yield scrapy.Request(
278
+ url=f'https://finance.naver.com/world/sise.nhn?symbol=SPI@SPX',
279
+ callback=self.parse_sp500,
280
+ cb_kwargs=dict(page=self.last_page_sp500),
281
+ )
282
+
283
+ def parse_sp500(self, response, page):
284
+ print(f"Parsing ...sp500 {page} page", flush=True)
285
+ item = items.MIitems()
286
+ # S&P500을 스크랩하는코드
287
+ time.sleep(WAIT)
288
+ self.logger.info(response.url)
289
+ self.driver.get(response.url)
290
+ time.sleep(WAIT)
291
+ next1 = '//*[@id="dayPaging"]/a[11]' # 첫페이지의 '다음' 버튼
292
+ next2 = '//*[@id="dayPaging"]/a[12]' # 첫페이지 이후의 '다음' 버튼
293
+ self.driver.find_element_by_xpath(next1).click()
294
+ time.sleep(WAIT)
295
+ '''
296
+ self.driver.find_element_by_xpath(next2).click()
297
+ time.sleep(1)
298
+ self.driver.find_element_by_xpath(next2).click()
299
+ time.sleep(1)
300
+ self.driver.find_element_by_xpath(next2).click()
301
+ time.sleep(1)
302
+ '''
303
+ for j in range(page, 0, -1):
304
+ if j % 10 == 0:
305
+ prev = '//*[@id="dayPaging"]/a[1]'
306
+ self.driver.find_element_by_xpath(prev).click()
307
+ self.logger.info('click prev...')
308
+ time.sleep(1)
309
+ link = f'//*[@id="dayLink{j}"]'
310
+ while True:
311
+ try:
312
+ self.driver.find_element_by_xpath(link).click()
313
+ self.logger.info(f'click {j} button..')
314
+ break
315
+ except NoSuchElementException as e:
316
+ self.logger.error(f'Error : {e}')
317
+ self.driver.find_element_by_xpath(next2).click()
318
+ time.sleep(1)
319
+ time.sleep(1)
320
+ sel = Selector(text=self.driver.page_source)
321
+
322
+ for i in range(10, 0, -1):
323
+ item['title'] = 'sp500'
324
+ item['date'] = sel.xpath(f'//*[@id="dayTable"]/tbody/tr[{i}]/td[1]/text()').get()
325
+ item['value'] = (sel.xpath(f'//*[@id="dayTable"]/tbody/tr[{i}]/td[2]/span/text()')
326
+ .get().replace(',', ''))
327
+ self.logger.info(f"date : {item['date']}, value : {item['value']}")
328
+ yield item
329
+
330
+ def __del__(self):
331
+ if self.driver is not None:
332
+ print('Retrieve chrome driver...')
333
+ self.driver.quit()
@@ -0,0 +1,25 @@
1
+ import scrapy
2
+ from mi import items
3
+
4
+ # cmd usage : scrapy crawl silver
5
+
6
+
7
+ class SilverSpider(scrapy.Spider):
8
+ name = 'silver'
9
+ allowed_domains = ['finance.naver.com']
10
+ start_urls = ['https://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=CMDT_SI&fdtc=2']
11
+
12
+ def parse(self, response):
13
+ print(f"Parsing ...'{self.name}' page", flush=True)
14
+ item = items.MIitems()
15
+ # Silver를 스크랩하는코드
16
+ self.logger.info(response.url)
17
+ # 최근 3개일의 데이터를 스크랩한다.
18
+ for r in range(3, 0, -1):
19
+ item['title'] = self.name
20
+ item['date'] = (response.xpath(f'/html/body/div/table/tbody/tr[{r}]/td[1]/text()')
21
+ .get().replace('\t', '').replace('\n', ''))
22
+ item['value'] = (response.xpath(f'/html/body/div/table/tbody/tr[{r}]/td[2]/text()')
23
+ .get().replace(',', '').replace('\t', '').replace('\n', ''))
24
+ self.logger.info(f"date : {item['date']}, value : {item['value']}")
25
+ yield item
@@ -0,0 +1,24 @@
1
+ import scrapy
2
+ from mi import items
3
+
4
+ # cmd usage : scrapy crawl sp500
5
+
6
+
7
+ class Sp500Spider(scrapy.Spider):
8
+ name = 'sp500'
9
+ allowed_domains = ['finance.naver.com']
10
+ start_urls = ['https://finance.naver.com/world/sise.nhn?symbol=SPI@SPX']
11
+
12
+ def parse(self, response):
13
+ print(f"Parsing ...'{self.name}' page", flush=True)
14
+ item = items.MIitems()
15
+ # S&P500를 스크랩하는코드
16
+ self.logger.info(response.url)
17
+ # 최근 3개일의 데이터를 스크랩한다.
18
+ for r in range(3, 0, -1):
19
+ item['title'] = self.name
20
+ item['date'] = response.xpath(f'//*[@id="dayTable"]/tbody/tr[{r}]/td[1]/text()').get()
21
+ item['value'] = (response.xpath(f'//*[@id="dayTable"]/tbody/tr[{r}]/td[2]/span/text()')
22
+ .get().replace(',', ''))
23
+ self.logger.info(f"date : {item['date']}, value : {item['value']}")
24
+ yield item
@@ -0,0 +1,30 @@
1
+ import scrapy
2
+ from mi import items
3
+
4
+ # cmd usage : scrapy crawl usdkrw
5
+
6
+
7
+ class UsdidxSpider(scrapy.Spider):
8
+ name = 'usdidx'
9
+ allowed_domains = ['finance.naver.com']
10
+ start_urls = ['https://finance.naver.com/marketindex/worldExchangeDetail.nhn?marketindexCd=FX_USDX']
11
+
12
+ def parse(self, response):
13
+ print(f"Parsing ...'{self.name}' page", flush=True)
14
+ item = items.MIitems()
15
+ # Dollar Index를 스크랩하는코드
16
+ self.logger.info(response.url)
17
+
18
+ # 최근 데이터를 스크랩한다.
19
+ # date - //*[@id="content"]/div[1]/div[2]/span[1]
20
+ # value - //*[@id="content"]/div[1]/div[1]/p[1]/em
21
+ value = []
22
+ for span in response.xpath(f'//*[@id="content"]/div[1]/div[1]/p[1]/em/span'):
23
+ value.append(span.xpath('text()').get())
24
+
25
+ item['title'] = self.name
26
+ item['date'] = response.xpath('//*[@id="content"]/div[1]/div[2]/span[1]/text()').get()
27
+ item['value'] = ''.join(value)
28
+
29
+ self.logger.info(f"date : {item['date']}, value : {item['value']}")
30
+ yield item
@@ -0,0 +1,24 @@
1
+ import scrapy
2
+ from mi import items
3
+
4
+ # cmd usage : scrapy crawl usdkrw
5
+
6
+
7
+ class UsdkrwSpider(scrapy.Spider):
8
+ name = 'usdkrw'
9
+ allowed_domains = ['finance.naver.com']
10
+ start_urls = ['https://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_USDKRW']
11
+
12
+ def parse(self, response):
13
+ print(f"Parsing ...'{self.name}' page", flush=True)
14
+ item = items.MIitems()
15
+ # S&P500를 스크랩하는코드
16
+ self.logger.info(response.url)
17
+ # 최근 3개일의 데이터를 스크랩한다.
18
+ for r in range(3, 0, -1):
19
+ item['title'] = self.name
20
+ item['date'] = response.xpath(f'/html/body/div/table/tbody/tr[{r}]/td[1]/text()').get()
21
+ item['value'] = (response.xpath(f'/html/body/div/table/tbody/tr[{r}]/td[2]/text()')
22
+ .get().replace(',', ''))
23
+ self.logger.info(f"date : {item['date']}, value : {item['value']}")
24
+ yield item
@@ -0,0 +1,25 @@
1
+ import scrapy
2
+ from mi import items
3
+
4
+ # cmd usage : scrapy crawl wti
5
+
6
+
7
+ class WtiSpider(scrapy.Spider):
8
+ name = 'wti'
9
+ allowed_domains = ['finance.naver.com']
10
+ start_urls = ['https://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=OIL_CL&fdtc=2']
11
+
12
+ def parse(self, response):
13
+ print(f"Parsing ...'{self.name}' page", flush=True)
14
+ item = items.MIitems()
15
+ # S&P500를 스크랩하는코드
16
+ self.logger.info(response.url)
17
+ # 최근 3개일의 데이터를 스크랩한다.
18
+ for r in range(3, 0, -1):
19
+ item['title'] = self.name
20
+ item['date'] = (response.xpath(f'/html/body/div/table/tbody/tr[{r}]/td[1]/text()')
21
+ .get().replace('\t', '').replace('\n', ''))
22
+ item['value'] = (response.xpath(f'/html/body/div/table/tbody/tr[{r}]/td[2]/text()')
23
+ .get().replace(',', '').replace('\t', '').replace('\n', ''))
24
+ self.logger.info(f"date : {item['date']}, value : {item['value']}")
25
+ yield item
@@ -0,0 +1,156 @@
1
+ import os
2
+ import time
3
+
4
+ from scrapy.crawler import CrawlerProcess
5
+ from scrapy.utils.project import get_project_settings
6
+ from multiprocessing import Process
7
+
8
+ from .mi import calc
9
+ from db_hj3415 import mongo2, dbpath
10
+ import datetime
11
+
12
+ import logging
13
+
14
+ logger = logging.getLogger(__name__)
15
+ formatter = logging.Formatter('%(levelname)s: [%(name)s] %(message)s')
16
+ ch = logging.StreamHandler()
17
+ ch.setFormatter(formatter)
18
+ logger.addHandler(ch)
19
+ logger.setLevel(logging.ERROR)
20
+
21
+
22
+ def chcwd(func):
23
+ """
24
+ scrapy는 항상 프로젝트 내부에서 실행해야 하기 때문에 일시적으로 현재 실행 경로를 변경해주는 목적의 데코레이션 함수
25
+ :param func:
26
+ :return:
27
+ """
28
+
29
+ def wrapper(*args, **kwargs):
30
+ before_cwd = os.getcwd()
31
+ logger.info(f'current path : {before_cwd}')
32
+ after_cwd = os.path.dirname(os.path.realpath(__file__))
33
+ logger.info(f'change path to {after_cwd}')
34
+ os.chdir(after_cwd)
35
+ func(*args, **kwargs)
36
+ logger.info(f'restore path to {before_cwd}')
37
+ os.chdir(before_cwd)
38
+
39
+ return wrapper
40
+
41
+
42
+ def _use_single(spider):
43
+ # reference from https://docs.scrapy.org/en/latest/topics/practices.html(코드로 스파이더 실행하기)
44
+ process = CrawlerProcess(get_project_settings())
45
+ process.crawl(spider)
46
+ process.start()
47
+
48
+
49
+ @chcwd
50
+ def mi():
51
+ spider_list = ('aud', 'chf', 'gbond3y', 'gold', 'kosdaq', 'kospi', 'silver', 'sp500', 'usdidx', 'usdkrw', 'wti',)
52
+ print('*' * 25, f"Scrape multiprocess mi", '*' * 25)
53
+ logger.info(spider_list)
54
+
55
+ start_time = time.time()
56
+ ths = []
57
+ error = False
58
+ for spider in spider_list:
59
+ ths.append(Process(target=_use_single, args=(spider,)))
60
+ for i in range(len(ths)):
61
+ ths[i].start()
62
+ for i in range(len(ths)):
63
+ ths[i].join()
64
+ if ths[i].exitcode != 0:
65
+ error = True
66
+
67
+ # calc 모듈을 이용해서 avg_per 과 yield_gap 을 계산하여 저장한다.
68
+ print('*' * 25, f"Calculate and save avgper and yieldgap", '*' * 25)
69
+ client = mongo2.connect_mongo(dbpath.load())
70
+ mi_mongo2 = mongo2.MI(client, 'avgper')
71
+ # mi_sqlite = sqlite.MI()
72
+ today_str = datetime.datetime.today().strftime('%Y.%m.%d')
73
+
74
+ avgper = calc.avg_per()
75
+ avgper_dict = {'date': today_str, 'value': str(avgper)}
76
+ logger.info(avgper_dict)
77
+ mi_mongo2.save(mi_dict=avgper_dict, index='avgper')
78
+ print(f'\tSave to mongo... date : {today_str} / title : avgper / value : {avgper}')
79
+ #mi_sqlite.save(mi_dict=avgper_dict, index='avgper')
80
+ #print(f'\tSave to sqlite... date : {today_str} / title : avgper / value : {avgper}')
81
+
82
+ yieldgap = calc.yield_gap(client, avgper)
83
+ yieldgap_dict = {'date': today_str, 'value': str(yieldgap)}
84
+ logger.info(yieldgap_dict)
85
+ mi_mongo2.save(mi_dict=yieldgap_dict, index='yieldgap')
86
+ print(f'\tSave to mongo... date : {today_str} / title : yieldgap / value : {yieldgap}')
87
+ #mi_sqlite.save(mi_dict=yieldgap_dict, index='yieldgap')
88
+ #print(f'\tSave to sqlite... date : {today_str} / title : yieldgap / value : {yieldgap}')
89
+
90
+ print(f'Total spent time : {round(time.time() - start_time, 2)} sec')
91
+ print('done.')
92
+ return error
93
+
94
+
95
+ @chcwd
96
+ def _mi_test(spider: str):
97
+ _use_single(spider=spider)
98
+
99
+
100
+ @chcwd
101
+ def mihistory(year: int):
102
+ process = CrawlerProcess(get_project_settings())
103
+ process.crawl('mihistory', year=year)
104
+ process.start()
105
+
106
+
107
+ """avgper 과 yieldgap 계산
108
+ """
109
+ import math
110
+ from db_hj3415 import mongo2, dbpath
111
+ from eval_hj3415 import eval
112
+ from util_hj3415 import utils
113
+
114
+ import logging
115
+ logger = logging.getLogger(__name__)
116
+ formatter = logging.Formatter('%(levelname)s: [%(name)s] %(message)s')
117
+ ch = logging.StreamHandler()
118
+ ch.setFormatter(formatter)
119
+ logger.addHandler(ch)
120
+ logger.setLevel(logging.WARNING)
121
+
122
+
123
+ def avg_per() -> float:
124
+ # 가중조화평균으로 평균 per 산출 mi db에 저장
125
+ per_r_cap_all = []
126
+ cap_all = []
127
+ eval_list = eval.make_today_eval_df(dbpath.load()).to_dict('records')
128
+ for data in eval_list:
129
+ # eval data: {'code': '111870', '종목명': 'KH 일렉트론', '주가': 1070, 'PER': -2.28, 'PBR': 0.96,
130
+ # '시가총액': 103300000000, 'RED': -11055.0, '주주수익률': -7.13, '이익지표': -0.30426, 'ROIC': -40.31,
131
+ # 'ROE': 0.0, 'PFCF': -7.7, 'PCR': nan}
132
+ logger.debug(f'eval data: {data}')
133
+ if math.isnan(data['PER']) or data['PER'] == 0:
134
+ continue
135
+ if math.isnan(data['시가총액']):
136
+ continue
137
+ cap_all.append(data['시가총액'])
138
+ per_r_cap_all.append((1 / data['PER']) * data['시가총액'])
139
+ logger.debug(f'Count cap_all :{len(cap_all)}')
140
+ logger.debug(f'Count per_r_cap_all : {len(per_r_cap_all)}')
141
+ try:
142
+ return round(sum(cap_all) / sum(per_r_cap_all), 2)
143
+ except ZeroDivisionError:
144
+ return float('nan')
145
+
146
+
147
+ def yield_gap(client, avg_per: float) -> float:
148
+ # 장고에서 사용할 yield gap, mi db에 저장
149
+ date, gbond3y = mongo2.MI(client, index='gbond3y').get_recent()
150
+ if math.isnan(avg_per) or avg_per == 0:
151
+ return float('nan')
152
+ else:
153
+ yield_share = (1 / avg_per) * 100
154
+ yield_gap = round(yield_share - utils.to_float(gbond3y), 2)
155
+ logger.debug(f"Date - {date}, gbond3y - {gbond3y}, yield_gap - {yield_gap}")
156
+ return yield_gap
@@ -0,0 +1,11 @@
1
+ # Automatically created by: scrapy startproject
2
+ #
3
+ # For more information about the [deploy] section see:
4
+ # https://scrapyd.readthedocs.io/en/latest/deploy.html
5
+
6
+ [settings]
7
+ default = mi.settings
8
+
9
+ [deploy]
10
+ #url = http://localhost:6800/
11
+ project = mi
@@ -32,3 +32,27 @@ class C101items(scrapy.Item):
32
32
  intro1 = scrapy.Field()
33
33
  intro2 = scrapy.Field()
34
34
  intro3 = scrapy.Field()
35
+
36
+
37
+ class C103items(scrapy.Item):
38
+ 코드 = scrapy.Field()
39
+ title = scrapy.Field()
40
+ df = scrapy.Field()
41
+
42
+
43
+ class C104items(scrapy.Item):
44
+ 코드 = scrapy.Field()
45
+ title = scrapy.Field()
46
+ df = scrapy.Field()
47
+
48
+
49
+ class C106items(scrapy.Item):
50
+ 코드 = scrapy.Field()
51
+ title = scrapy.Field() # y or q
52
+ df = scrapy.Field()
53
+
54
+ """
55
+ class C108items(scrapy.Item):
56
+ 코드 = scrapy.Field()
57
+ df = scrapy.Field()
58
+ """