scraper2-hj3415 0.2.1__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
Binary file
@@ -100,23 +100,24 @@ def mi_all(mongo_addr: str):
100
100
 
101
101
  # calc 모듈을 이용해서 avg_per 과 yield_gap 을 계산하여 저장한다.
102
102
  print('*' * 25, f"Calculate and save avgper and yieldgap", '*' * 25)
103
- client = mongo2.connect_mongo(dbpath.load())
104
- mi_mongo2 = mongo2.MI(client, 'avgper')
105
- # mi_sqlite = sqlite.MI()
103
+ client = mongo.connect_mongo("mongodb://192.168.0.173:27017")
104
+ mi_db = mongo.MI(client, 'avgper')
105
+
106
106
  today_str = datetime.datetime.today().strftime('%Y.%m.%d')
107
107
 
108
- avgper = calc.avg_per()
108
+ avgper = avg_per()
109
109
  avgper_dict = {'date': today_str, 'value': str(avgper)}
110
110
  logger.info(avgper_dict)
111
- mi_mongo2.save(mi_dict=avgper_dict, index='avgper')
111
+ mi_db.save_dict(mi_dict=avgper_dict)
112
112
  print(f'\tSave to mongo... date : {today_str} / title : avgper / value : {avgper}')
113
113
  #mi_sqlite.save(mi_dict=avgper_dict, index='avgper')
114
114
  #print(f'\tSave to sqlite... date : {today_str} / title : avgper / value : {avgper}')
115
115
 
116
- yieldgap = calc.yield_gap(client, avgper)
116
+ yieldgap = yield_gap(client, avgper)
117
117
  yieldgap_dict = {'date': today_str, 'value': str(yieldgap)}
118
118
  logger.info(yieldgap_dict)
119
- mi_mongo2.save(mi_dict=yieldgap_dict, index='yieldgap')
119
+ mi_db.index = 'yieldgap'
120
+ mi_db.save_dict(mi_dict=yieldgap_dict)
120
121
  print(f'\tSave to mongo... date : {today_str} / title : yieldgap / value : {yieldgap}')
121
122
  #mi_sqlite.save(mi_dict=yieldgap_dict, index='yieldgap')
122
123
  #print(f'\tSave to sqlite... date : {today_str} / title : yieldgap / value : {yieldgap}')
Binary file
Binary file
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  import time
2
3
  import scrapy
3
4
  from util_hj3415 import utils
@@ -46,22 +47,24 @@ class C103Spider(scrapy.Spider, metaclass=ABCMeta):
46
47
 
47
48
  def parse_c103(self, response, code):
48
49
  # 페이지를 먼저 한번 호출하여 버튼을 눌러 세팅한다.
49
- self.setting_page(response.url)
50
-
51
- # html에서 table을 추출하여 dataframe생성
52
- self.driver.get(response.url)
53
- time.sleep(self.WAIT)
54
- html = Selector(text=self.driver.page_source)
55
- table_xpath = '//table[2]'
56
- df = common.get_df_from_html(html, table_xpath, 1)
57
- self.logger.debug(df)
58
-
59
- # make item to yield
60
- item = items.C103items()
61
- item['코드'] = code
62
- item['title'] = self.title
63
- item['df'] = df
64
- yield item
50
+ if self.setting_page(response.url):
51
+ # html에서 table을 추출하여 dataframe생성
52
+ self.driver.get(response.url)
53
+ time.sleep(self.WAIT)
54
+ html = Selector(text=self.driver.page_source)
55
+ table_xpath = '//table[2]'
56
+ df = common.get_df_from_html(html, table_xpath, 1)
57
+ self.logger.debug(df)
58
+
59
+ # make item to yield
60
+ item = items.C103items()
61
+ item['코드'] = code
62
+ item['title'] = self.title
63
+ item['df'] = df
64
+ yield item
65
+ else:
66
+ self.logger.warning("Pasing error ... maybe 올바른 종목이 아닙니다.")
67
+
65
68
 
66
69
  @abstractmethod
67
70
  def setting_page(self, url: str):
@@ -90,13 +93,13 @@ class C103BQ(C103Spider):
90
93
  def __init__(self, code, mongo_client):
91
94
  super(C103BQ, self).__init__(code, mongo_client, title='재무상태표q')
92
95
 
93
- def setting_page(self, url: str):
96
+ def setting_page(self, url: str) -> bool:
94
97
  buttons = [
95
98
  ('재무상태표', '//*[@id="rpt_tab2"]'),
96
99
  ('분기', '//*[@id="frqTyp1"]'),
97
100
  ('검색', '//*[@id="hfinGubun"]'),
98
101
  ]
99
- common.click_buttons(self.driver, url, buttons, self.WAIT)
102
+ return common.click_buttons(self.driver, url, buttons, self.WAIT)
100
103
 
101
104
 
102
105
  class C103CQ(C103Spider):
@@ -105,13 +108,13 @@ class C103CQ(C103Spider):
105
108
  def __init__(self, code, mongo_client):
106
109
  super().__init__(code, mongo_client, title='현금흐름표q')
107
110
 
108
- def setting_page(self, url: str):
111
+ def setting_page(self, url: str) -> bool:
109
112
  buttons = [
110
113
  ('현금흐름표', '//*[@id="rpt_tab3"]'),
111
114
  ('분기', '//*[@id="frqTyp1"]'),
112
115
  ('검색', '//*[@id="hfinGubun"]'),
113
116
  ]
114
- common.click_buttons(self.driver, url, buttons, self.WAIT)
117
+ return common.click_buttons(self.driver, url, buttons, self.WAIT)
115
118
 
116
119
 
117
120
  class C103IQ(C103Spider):
@@ -120,13 +123,13 @@ class C103IQ(C103Spider):
120
123
  def __init__(self, code, mongo_client):
121
124
  super().__init__(code, mongo_client, title='손익계산서q')
122
125
 
123
- def setting_page(self, url: str):
126
+ def setting_page(self, url: str) -> bool:
124
127
  buttons = [
125
128
  ('손익계산서', '//*[@id="rpt_tab1"]'),
126
129
  ('분기', '//*[@id="frqTyp1"]'),
127
130
  ('검색', '//*[@id="hfinGubun"]'),
128
131
  ]
129
- common.click_buttons(self.driver, url, buttons, self.WAIT)
132
+ return common.click_buttons(self.driver, url, buttons, self.WAIT)
130
133
 
131
134
 
132
135
  class C103BY(C103Spider):
@@ -135,13 +138,13 @@ class C103BY(C103Spider):
135
138
  def __init__(self, code, mongo_client):
136
139
  super().__init__(code, mongo_client, title='재무상태표y')
137
140
 
138
- def setting_page(self, url: str):
141
+ def setting_page(self, url: str) -> bool:
139
142
  buttons = [
140
143
  ('재무상태표', '//*[@id="rpt_tab2"]'),
141
144
  ('연간', '//*[@id="frqTyp0"]'),
142
145
  ('검색', '//*[@id="hfinGubun"]'),
143
146
  ]
144
- common.click_buttons(self.driver, url, buttons, self.WAIT)
147
+ return common.click_buttons(self.driver, url, buttons, self.WAIT)
145
148
 
146
149
 
147
150
  class C103CY(C103Spider):
@@ -150,13 +153,13 @@ class C103CY(C103Spider):
150
153
  def __init__(self, code, mongo_client):
151
154
  super().__init__(code, mongo_client, title='현금흐름표y')
152
155
 
153
- def setting_page(self, url: str):
156
+ def setting_page(self, url: str) -> bool:
154
157
  buttons = [
155
158
  ('현금흐름표', '//*[@id="rpt_tab3"]'),
156
159
  ('연간', '//*[@id="frqTyp0"]'),
157
160
  ('검색', '//*[@id="hfinGubun"]'),
158
161
  ]
159
- common.click_buttons(self.driver, url, buttons, self.WAIT)
162
+ return common.click_buttons(self.driver, url, buttons, self.WAIT)
160
163
 
161
164
 
162
165
  class C103IY(C103Spider):
@@ -165,10 +168,10 @@ class C103IY(C103Spider):
165
168
  def __init__(self, code, mongo_client):
166
169
  super().__init__(code, mongo_client, title='손익계산서y')
167
170
 
168
- def setting_page(self, url: str):
171
+ def setting_page(self, url: str) -> bool:
169
172
  buttons = [
170
173
  ('손익계산서', '//*[@id="rpt_tab1"]'),
171
174
  ('연간', '//*[@id="frqTyp0"]'),
172
175
  ('검색', '//*[@id="hfinGubun"]'),
173
176
  ]
174
- common.click_buttons(self.driver, url, buttons, self.WAIT)
177
+ return common.click_buttons(self.driver, url, buttons, self.WAIT)
@@ -46,36 +46,37 @@ class C104Spider(scrapy.Spider, metaclass=ABCMeta):
46
46
 
47
47
  def parse_c104(self, response, code):
48
48
  # 페이지를 먼저 한번 호출하여 버튼을 눌러 세팅한다.
49
- self.setting_page(response.url)
50
-
51
- # html에서 table을 추출하여 dataframe생성
52
- self.driver.get(response.url)
53
- time.sleep(self.WAIT)
54
- html = Selector(text=self.driver.page_source)
55
- table_xpath = '//table[@class="gHead01 all-width data-list"]'
56
-
57
- # 테이블명을 _을 기준으로 나눠 리스트를 만든다.
58
- title_list = self.title.split('_')
59
- self.logger.debug(title_list)
60
-
61
- # dataframe 리스트를 만든다.
62
- df_list = []
63
- for i in range(2):
64
- # 상위테이블 0, 하위테이블 1
65
- df_list.append(common.get_df_from_html(html, table_xpath, i))
66
- self.logger.debug(df_list)
67
-
68
- # 테이블명리스트와 df리스트를 매치하여 데이터베이스에 저장하기 위해 yield시킴
69
- for title, df in list(zip(title_list, df_list)):
70
- # df를 log로 출력한다.
71
- self.logger.info(title)
72
- self.logger.debug(df)
73
- # make item to yield
74
- item = items.C104items()
75
- item['코드'] = code
76
- item['title'] = title
77
- item['df'] = df
78
- yield item
49
+ if self.setting_page(response.url):
50
+ # html에서 table을 추출하여 dataframe생성
51
+ self.driver.get(response.url)
52
+ time.sleep(self.WAIT)
53
+ html = Selector(text=self.driver.page_source)
54
+ table_xpath = '//table[@class="gHead01 all-width data-list"]'
55
+
56
+ # 테이블명을 _을 기준으로 나눠 리스트를 만든다.
57
+ title_list = self.title.split('_')
58
+ self.logger.debug(title_list)
59
+
60
+ # dataframe 리스트를 만든다.
61
+ df_list = []
62
+ for i in range(2):
63
+ # 상위테이블 0, 하위테이블 1
64
+ df_list.append(common.get_df_from_html(html, table_xpath, i))
65
+ self.logger.debug(df_list)
66
+
67
+ # 테이블명리스트와 df리스트를 매치하여 데이터베이스에 저장하기 위해 yield시킴
68
+ for title, df in list(zip(title_list, df_list)):
69
+ # df log로 출력한다.
70
+ self.logger.info(title)
71
+ self.logger.debug(df)
72
+ # make item to yield
73
+ item = items.C104items()
74
+ item['코드'] = code
75
+ item['title'] = title
76
+ item['df'] = df
77
+ yield item
78
+ else:
79
+ self.logger.warning("Pasing error ... maybe 올바른 종목이 아닙니다.")
79
80
 
80
81
  @abstractmethod
81
82
  def setting_page(self, url: str):
@@ -110,7 +111,7 @@ class C104AQ(C104Spider):
110
111
  def __init__(self, code, mongo_client):
111
112
  super(C104AQ, self).__init__(code, mongo_client, title='수익성q_가치분석q')
112
113
 
113
- def setting_page(self, url: str):
114
+ def setting_page(self, url: str) -> bool:
114
115
  buttons = [
115
116
  ('수익성', '//*[ @id="val_tab1"]'),
116
117
  ('분기', '//*[@id="frqTyp1"]'),
@@ -118,7 +119,7 @@ class C104AQ(C104Spider):
118
119
  ('가치분석분기', '//*[@id="frqTyp1_2"]'),
119
120
  ('가치분석검색', '//*[@id="hfinGubun2"]'),
120
121
  ]
121
- common.click_buttons(self.driver, url, buttons, self.WAIT)
122
+ return common.click_buttons(self.driver, url, buttons, self.WAIT)
122
123
 
123
124
 
124
125
  class C104BQ(C104Spider):
@@ -127,13 +128,13 @@ class C104BQ(C104Spider):
127
128
  def __init__(self, code, mongo_client):
128
129
  super(C104BQ, self).__init__(code, mongo_client, title='성장성q')
129
130
 
130
- def setting_page(self, url: str):
131
+ def setting_page(self, url: str) -> bool:
131
132
  buttons = [
132
133
  ('성장성', '//*[ @id="val_tab2"]'),
133
134
  ('분기', '//*[@id="frqTyp1"]'),
134
135
  ('검색', '//*[@id="hfinGubun"]'),
135
136
  ]
136
- common.click_buttons(self.driver, url, buttons, self.WAIT)
137
+ return common.click_buttons(self.driver, url, buttons, self.WAIT)
137
138
 
138
139
 
139
140
  class C104CQ(C104Spider):
@@ -142,13 +143,13 @@ class C104CQ(C104Spider):
142
143
  def __init__(self, code, mongo_client):
143
144
  super(C104CQ, self).__init__(code, mongo_client, title='안정성q')
144
145
 
145
- def setting_page(self, url: str):
146
+ def setting_page(self, url: str) -> bool:
146
147
  buttons = [
147
148
  ('안정성', '//*[ @id="val_tab3"]'),
148
149
  ('분기', '//*[@id="frqTyp1"]'),
149
150
  ('검색', '//*[@id="hfinGubun"]'),
150
151
  ]
151
- common.click_buttons(self.driver, url, buttons, self.WAIT)
152
+ return common.click_buttons(self.driver, url, buttons, self.WAIT)
152
153
 
153
154
 
154
155
  class C104DQ(C104Spider):
@@ -157,13 +158,13 @@ class C104DQ(C104Spider):
157
158
  def __init__(self, code, mongo_client):
158
159
  super(C104DQ, self).__init__(code, mongo_client, title='활동성q')
159
160
 
160
- def setting_page(self, url: str):
161
+ def setting_page(self, url: str) -> bool:
161
162
  buttons = [
162
163
  ('활동성', '//*[ @id="val_tab4"]'),
163
164
  ('분기', '//*[@id="frqTyp1"]'),
164
165
  ('검색', '//*[@id="hfinGubun"]'),
165
166
  ]
166
- common.click_buttons(self.driver, url, buttons, self.WAIT)
167
+ return common.click_buttons(self.driver, url, buttons, self.WAIT)
167
168
 
168
169
 
169
170
  class C104AY(C104Spider):
@@ -172,7 +173,7 @@ class C104AY(C104Spider):
172
173
  def __init__(self, code, mongo_client):
173
174
  super(C104AY, self).__init__(code, mongo_client, title='수익성y_가치분석y')
174
175
 
175
- def setting_page(self, url: str):
176
+ def setting_page(self, url: str) -> bool:
176
177
  buttons = [
177
178
  ('수익성', '//*[ @id="val_tab1"]'),
178
179
  ('연간', '//*[@id="frqTyp0"]'),
@@ -180,7 +181,7 @@ class C104AY(C104Spider):
180
181
  ('가치분석연간', '//*[@id="frqTyp0_2"]'),
181
182
  ('가치분석검색', '//*[@id="hfinGubun2"]'),
182
183
  ]
183
- common.click_buttons(self.driver, url, buttons, self.WAIT)
184
+ return common.click_buttons(self.driver, url, buttons, self.WAIT)
184
185
 
185
186
 
186
187
  class C104BY(C104Spider):
@@ -189,13 +190,13 @@ class C104BY(C104Spider):
189
190
  def __init__(self, code, mongo_client):
190
191
  super(C104BY, self).__init__(code, mongo_client, title='성장성y')
191
192
 
192
- def setting_page(self, url: str):
193
+ def setting_page(self, url: str) -> bool:
193
194
  buttons = [
194
195
  ('성장성', '//*[ @id="val_tab2"]'),
195
196
  ('연간', '//*[@id="frqTyp0"]'),
196
197
  ('검색', '//*[@id="hfinGubun"]'),
197
198
  ]
198
- common.click_buttons(self.driver, url, buttons, self.WAIT)
199
+ return common.click_buttons(self.driver, url, buttons, self.WAIT)
199
200
 
200
201
 
201
202
  class C104CY(C104Spider):
@@ -204,13 +205,13 @@ class C104CY(C104Spider):
204
205
  def __init__(self, code, mongo_client):
205
206
  super(C104CY, self).__init__(code, mongo_client, title='안정성y')
206
207
 
207
- def setting_page(self, url: str):
208
+ def setting_page(self, url: str) -> bool:
208
209
  buttons = [
209
210
  ('안정성', '//*[ @id="val_tab3"]'),
210
211
  ('연간', '//*[@id="frqTyp0"]'),
211
212
  ('검색', '//*[@id="hfinGubun"]'),
212
213
  ]
213
- common.click_buttons(self.driver, url, buttons, self.WAIT)
214
+ return common.click_buttons(self.driver, url, buttons, self.WAIT)
214
215
 
215
216
 
216
217
  class C104DY(C104Spider):
@@ -219,10 +220,10 @@ class C104DY(C104Spider):
219
220
  def __init__(self, code, mongo_client):
220
221
  super(C104DY, self).__init__(code, mongo_client, title='활동성y')
221
222
 
222
- def setting_page(self, url: str):
223
+ def setting_page(self, url: str) -> bool:
223
224
  buttons = [
224
225
  ('활동성', '//*[ @id="val_tab4"]'),
225
226
  ('연간', '//*[@id="frqTyp0"]'),
226
227
  ('검색', '//*[@id="hfinGubun"]'),
227
228
  ]
228
- common.click_buttons(self.driver, url, buttons, self.WAIT)
229
+ return common.click_buttons(self.driver, url, buttons, self.WAIT)
@@ -1,6 +1,7 @@
1
1
  import time
2
2
  import scrapy
3
3
  import pandas as pd
4
+ import selenium.common.exceptions
4
5
  from util_hj3415 import utils
5
6
  from scrapy.selector import Selector
6
7
 
@@ -37,28 +38,31 @@ class C106Spider(scrapy.Spider):
37
38
  self.driver.get(response.url)
38
39
  # 경험상 1초는 데이터 수집에러 가능성 있었음.
39
40
  time.sleep(self.WAIT)
40
- html = Selector(text=self.driver.page_source)
41
+ try:
42
+ html = Selector(text=self.driver.page_source)
43
+ except selenium.common.exceptions.UnexpectedAlertPresentException:
44
+ self.logger.warning("Pasing error ... maybe 올바른 종목이 아닙니다.")
45
+ else:
46
+ # 컬럼명을 얻어 다음 request에 실어 보낸다.
47
+ cols = []
48
+ for i in range(1, 7):
49
+ pretitle = html.xpath(f'//*[@id="cTB611_h"]/thead/tr/th[{i}]/text()[1]').getall()[0].strip().replace('.','')
50
+ # 인덱스에 공칸일 경우 데이터베이스 저장시 에러가 발생하기 때문에 추가한 코드
51
+ if pretitle == '':
52
+ pretitle = 'Unnamed'
53
+ cols.append(pretitle)
54
+ self.logger.info(f'Parsing column names - {code} >>>> {cols}')
41
55
 
42
- # 컬럼명을 얻어 다음 request에 실어 보낸다.
43
- cols = []
44
- for i in range(1, 7):
45
- pretitle = html.xpath(f'//*[@id="cTB611_h"]/thead/tr/th[{i}]/text()[1]').getall()[0].strip().replace('.','')
46
- # 인덱스에 공칸일 경우 데이터베이스 저장시 에러가 발생하기 때문에 추가한 코드
47
- if pretitle == '':
48
- pretitle = 'Unnamed'
49
- cols.append(pretitle)
50
- self.logger.info(f'Parsing column names - {code} >>>> {cols}')
51
-
52
- titles = ['y', 'q'] # pipeline에서 테이블명으로 됨
53
- for title in titles:
54
- # C106의 내부의 iframe주소, 분기와 연간 2개임
55
- # reference from https://docs.scrapy.org/en/latest/topics/request-response.html (request 연쇄보내기)
56
- yield scrapy.Request(
57
- url=f'https://navercomp.wisereport.co.kr/company/cF6002.aspx?cmp_cd={code}'
58
- f'&finGubun=MAIN&cmp_cd1=&cmp_cd2=&cmp_cd3=&cmp_cd4=&sec_cd=G453010&frq={title.upper()}',
59
- callback=self.parse_c106,
60
- cb_kwargs=dict(code=code, cols=cols, title=title)
61
- )
56
+ titles = ['y', 'q'] # pipeline에서 테이블명으로 됨
57
+ for title in titles:
58
+ # C106의 내부의 iframe주소, 분기와 연간 2개임
59
+ # reference from https://docs.scrapy.org/en/latest/topics/request-response.html (request 연쇄보내기)
60
+ yield scrapy.Request(
61
+ url=f'https://navercomp.wisereport.co.kr/company/cF6002.aspx?cmp_cd={code}'
62
+ f'&finGubun=MAIN&cmp_cd1=&cmp_cd2=&cmp_cd3=&cmp_cd4=&sec_cd=G453010&frq={title.upper()}',
63
+ callback=self.parse_c106,
64
+ cb_kwargs=dict(code=code, cols=cols, title=title)
65
+ )
62
66
 
63
67
  def parse_c106(self, response, code, cols, title):
64
68
  df = C106Spider.get_df_from_html(response.text, cols)
@@ -1,6 +1,8 @@
1
1
  import time
2
2
  import pandas as pd
3
3
  from typing import Tuple, List
4
+
5
+ import selenium.common.exceptions
4
6
  from selenium.webdriver.chrome.webdriver import WebDriver
5
7
  from selenium.webdriver.common.by import By
6
8
 
@@ -25,15 +27,28 @@ def adjust_arg_type(code) -> list:
25
27
  raise TypeError
26
28
 
27
29
 
28
- def click_buttons(driver: WebDriver, url: str, buttons: List[Tuple[str, str]], wait: float):
29
- # 하부 클래스에서 buttons 리스트를 입력받아 실제 버튼을 클릭하는 함수
30
+ def click_buttons(driver: WebDriver, url: str, buttons: List[Tuple[str, str]], wait: float) -> bool:
31
+ """
32
+ 하부 클래스에서 buttons 리스트를 입력받아 실제 버튼을 클릭하는 함수
33
+
34
+ :param driver:
35
+ :param url:
36
+ :param buttons:
37
+ :param wait:
38
+ :return: 함수 작업이 무사히 완료되면 True
39
+ """
40
+ #
30
41
  logger.debug(f'*** Setting {url} page by clicking buttons ***')
31
42
  driver.get(url)
32
43
  for name, xpath in buttons:
33
44
  logger.debug(f'- Click the {name} button')
34
- driver.find_element(By.XPATH, xpath).click()
45
+ try:
46
+ driver.find_element(By.XPATH, xpath).click()
47
+ except selenium.common.exceptions.UnexpectedAlertPresentException:
48
+ return False
35
49
  time.sleep(wait)
36
50
  logger.debug('*** Buttons click done ***')
51
+ return True
37
52
 
38
53
 
39
54
  def get_df_from_html(selector, xpath, table_num):
@@ -3,7 +3,7 @@ import sys
3
3
  import time
4
4
 
5
5
  from scrapy.crawler import CrawlerProcess
6
- from multiprocessing import Process, cpu_count
6
+ from multiprocessing import Process
7
7
  from scrapy.utils.project import get_project_settings
8
8
  from analyser_hj3415.db import mongo
9
9
 
@@ -65,37 +65,6 @@ def _run_scrapy(spider: str, codes: list, mongo_addr: str):
65
65
  mongo_client.close()
66
66
 
67
67
 
68
- def _code_divider(entire_codes: list) -> tuple:
69
- """
70
- 전체 종목 코드를 리스트로 넣으면 cpu 코어에 맞춰 나눠 준다.
71
- reference from https://stackoverflow.com/questions/19086106/how-to-utilize-all-cores-with-python-multiprocessing
72
- :param entire_codes:
73
- :return:
74
- """
75
-
76
- def _split_list(alist, wanted_parts=1):
77
- """
78
- 멀티프로세싱할 갯수로 리스트를 나눈다.
79
- reference from https://www.it-swarm.dev/ko/python/%EB%8D%94-%EC%9E%91%EC%9D%80-%EB%AA%A9%EB%A1%9D%EC%9C%BC%EB%
80
- A1%9C-%EB%B6%84%ED%95%A0-%EB%B0%98%EC%9C%BC%EB%A1%9C-%EB%B6%84%ED%95%A0/957910776/
81
- :param alist:
82
- :param wanted_parts:
83
- :return:
84
- """
85
- length = len(alist)
86
- return [alist[i * length // wanted_parts: (i + 1) * length // wanted_parts]
87
- for i in range(wanted_parts)]
88
-
89
- core = cpu_count()
90
- print(f'Get number of core for multiprocessing : {core}')
91
- n = core - 1
92
- if len(entire_codes) < n:
93
- n = len(entire_codes)
94
- print(f'Split total {len(entire_codes)} codes by {n} parts ...')
95
- divided_list = _split_list(entire_codes, wanted_parts=n)
96
- return n, divided_list
97
-
98
-
99
68
  def _mp_c10168(spider: str, codes: list, mongo_addr: str):
100
69
  """
101
70
  전체 코드를 코어수 대로 나눠서 멀티 프로세싱 시행
@@ -116,7 +85,7 @@ def _mp_c10168(spider: str, codes: list, mongo_addr: str):
116
85
  print('*' * 25, f"Scrape multiprocess {spider.capitalize()}", '*' * 25)
117
86
  print(f'Total {len(codes)} items..')
118
87
  logger.info(codes)
119
- n, divided_list = _code_divider(codes)
88
+ n, divided_list = utils.code_divider_by_cpu_core(codes)
120
89
 
121
90
  start_time = time.time()
122
91
  ths = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: scraper2_hj3415
3
- Version: 0.2.1
3
+ Version: 0.2.5
4
4
  Summary: Gathering the stock data
5
5
  Project-URL: Homepage, https://pypi.org/project/scraper2_hj3415/
6
6
  Project-URL: Bug Tracker, https://pypi.org/project/scraper2_hj3415/
@@ -16,7 +16,7 @@ Requires-Dist: pandas>=1.5.3
16
16
  Requires-Dist: scrapy>=2.8.0
17
17
  Requires-Dist: selenium>=4.8.2
18
18
  Requires-Dist: sqlalchemy>=2.0.4
19
- Requires-Dist: util-hj3415>=0.9.1
19
+ Requires-Dist: util-hj3415>=0.9.4
20
20
  Description-Content-Type: text/markdown
21
21
 
22
22
  # scraper2_hj3415 Package
@@ -1,8 +1,10 @@
1
+ scraper2_hj3415/.DS_Store,sha256=cDL3zE-LYtmbxAQBGMgkUf7QwURK7hSJJCXBwQB4Wuw,6148
1
2
  scraper2_hj3415/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
3
  scraper2_hj3415/cli.py,sha256=B2IPg2Sa5WIYKh4gfoc1U3UclUPoew092KO48rEPymw,2251
3
4
  scraper2_hj3415/krx/krx.db,sha256=Mrj2Y2b6v32-TxvWf0eNrQQ9tsYjZhm6EduTw6byq7s,458752
4
5
  scraper2_hj3415/krx/krx.py,sha256=uhht6u5kDKfeLrbf0n2EwvMXRXZ6yufrLdJnDvU8rRk,8621
5
- scraper2_hj3415/miscrapy/run.py,sha256=HqK564kHFiMQqlfxNkjru5q1h2b7OBWEtsth4Ll7YYw,5997
6
+ scraper2_hj3415/miscrapy/.DS_Store,sha256=UeGIuQDMXpgNhrcZMAzyyMOPYWS2zFo9PQM6ROeL_Ko,6148
7
+ scraper2_hj3415/miscrapy/run.py,sha256=X-cm-a8tCMrjHvb2CR9l2cl4SRv3DO5EV4ga1ebs8Fk,5967
6
8
  scraper2_hj3415/miscrapy/scrapy.cfg,sha256=KvMPqIfrkMIydPYMPb0fN9m_IBpr61N7ctwCwudcF2s,247
7
9
  scraper2_hj3415/miscrapy/mi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
10
  scraper2_hj3415/miscrapy/mi/items.py,sha256=g6qALzFirk5Y1N3XGF3SaeLkbzzTebW782uqLKc3Z70,124
@@ -22,23 +24,26 @@ scraper2_hj3415/miscrapy/mi/spiders/sp500.py,sha256=oOaBdGhnggg-laQ8aJp5sTvbYwjC
22
24
  scraper2_hj3415/miscrapy/mi/spiders/usdidx.py,sha256=TQ0zSxg7xpFiudhsgyjiJ38G3DMCkVAVaDYEnIU_H3k,1074
23
25
  scraper2_hj3415/miscrapy/mi/spiders/usdkrw.py,sha256=BmLmOEK4HogPFZ1XOFXB_RhDyAO2vjRVC6a1bNnLbxQ,963
24
26
  scraper2_hj3415/miscrapy/mi/spiders/wti.py,sha256=Yy49k-uzpUBpAu-zXhFLCWp_ZEqWXiIPXtJK3PvQUGQ,1058
27
+ scraper2_hj3415/nfscrapy/.DS_Store,sha256=IFfA9YTtUu0j75YHZ4K-InafK5PcbI7dJvZSymK5PQU,6148
25
28
  scraper2_hj3415/nfscrapy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
- scraper2_hj3415/nfscrapy/run.py,sha256=z37lshcdWSNWbsGG7pXtRb-OEk7BR71ukmeHbqrZp00,7444
29
+ scraper2_hj3415/nfscrapy/run.py,sha256=EyVgW7h6FMdiXeYou3Jt7XIwISJ93qHFiACHWyAfzr4,6222
27
30
  scraper2_hj3415/nfscrapy/scrapy.cfg,sha256=yCkEgpzAwc9NWjYGaEUelGdLg3mUuuQF1Zl0k5vITH8,260
31
+ scraper2_hj3415/nfscrapy/nfs/.DS_Store,sha256=UmDncKO3_s-mRcSWiV4oRmyeZhMcn3NCHuSKduga3KU,6148
28
32
  scraper2_hj3415/nfscrapy/nfs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
33
  scraper2_hj3415/nfscrapy/nfs/items.py,sha256=n9xDKoGez8oRmsRLSDjjPhLCXgeJGUsKydjWc_gzgbk,1479
30
34
  scraper2_hj3415/nfscrapy/nfs/middlewares.py,sha256=F3kL0OJMsUhiw2mPIxQLGvY3wdMxIsQl1BG2uwo_ZHo,3745
31
35
  scraper2_hj3415/nfscrapy/nfs/pipelines.py,sha256=iC46Ujkt0L0n4IW8NFH8eMYXy-x-i1KcmUnIhqQRa5s,4874
32
36
  scraper2_hj3415/nfscrapy/nfs/settings.py,sha256=BkOy913N9K8Ypwuj0GiZ0zAI_WnBcjJzwjgnZOS_jqw,3472
37
+ scraper2_hj3415/nfscrapy/nfs/spiders/.DS_Store,sha256=4bCeSNMYj0-03QEqosRz4reLOvZqDbpHuKeCpjNSZ4g,6148
33
38
  scraper2_hj3415/nfscrapy/nfs/spiders/__init__.py,sha256=ULwecZkx3_NTphkz7y_qiazBeUoHFnCCWnKSjoDCZj0,161
34
39
  scraper2_hj3415/nfscrapy/nfs/spiders/_c108.py,sha256=gOgWQ7qTOOBEZQTY79K5MWzjmz-ZpFg5wrqBhzLoXjI,4776
35
40
  scraper2_hj3415/nfscrapy/nfs/spiders/c101.py,sha256=-b_bC0jS_MgDP9h9n2MFHRq-wDx1CGWsgWj6-KQKnpc,3931
36
- scraper2_hj3415/nfscrapy/nfs/spiders/c103.py,sha256=5E7jZX5UIASCtqGa0eikJVcwlKQsveT-4nyTN6TXGkQ,5614
37
- scraper2_hj3415/nfscrapy/nfs/spiders/c104.py,sha256=usz29mBeXiQrv_e7uDf2ZihbENVsafEUHUJw8aHRwlw,7645
38
- scraper2_hj3415/nfscrapy/nfs/spiders/c106.py,sha256=cMbpSLqG4EanKzh6rjpFLAnECt_zLR34MTZo34xu6Go,3783
39
- scraper2_hj3415/nfscrapy/nfs/spiders/common.py,sha256=QTVWFF386cvoN2czFJQpTeE5jRgnlSE_Z-7y13tPyi8,2876
40
- scraper2_hj3415-0.2.1.dist-info/METADATA,sha256=XHlnu0dqIGw6s-iaWvXyq5vREG1tCoH41Ve_2fsPSLg,1303
41
- scraper2_hj3415-0.2.1.dist-info/WHEEL,sha256=Fd6mP6ydyRguakwUJ05oBE7fh2IPxgtDN9IwHJ9OqJQ,87
42
- scraper2_hj3415-0.2.1.dist-info/entry_points.txt,sha256=1bGP38AtuY6n2FcP_fLmpqGtFmFf8NLJWsCSWoTjF-0,60
43
- scraper2_hj3415-0.2.1.dist-info/licenses/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
44
- scraper2_hj3415-0.2.1.dist-info/RECORD,,
41
+ scraper2_hj3415/nfscrapy/nfs/spiders/c103.py,sha256=OUcieEOQhD4ttWwDNEFX2F65KtCHe6TOhh3slXuYqhE,5884
42
+ scraper2_hj3415/nfscrapy/nfs/spiders/c104.py,sha256=ugF9yEKtAfZgLDqmX7Y-y1aoIfUR81Txouda27x1Mro,7975
43
+ scraper2_hj3415/nfscrapy/nfs/spiders/c106.py,sha256=SRm6tJiWOCZrxvHF5LnaAXzK2Ui1U80D65JGRaGRNEM,4094
44
+ scraper2_hj3415/nfscrapy/nfs/spiders/common.py,sha256=Y4ENlLTuQAQjPX3z1Xco7HvFdcIUByHohs4lYMjw69Q,3218
45
+ scraper2_hj3415-0.2.5.dist-info/METADATA,sha256=3Am9xsWTglARO3YFF7KsVao33FieXHoJ812UIXNsrQ0,1303
46
+ scraper2_hj3415-0.2.5.dist-info/WHEEL,sha256=Fd6mP6ydyRguakwUJ05oBE7fh2IPxgtDN9IwHJ9OqJQ,87
47
+ scraper2_hj3415-0.2.5.dist-info/entry_points.txt,sha256=1bGP38AtuY6n2FcP_fLmpqGtFmFf8NLJWsCSWoTjF-0,60
48
+ scraper2_hj3415-0.2.5.dist-info/licenses/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
49
+ scraper2_hj3415-0.2.5.dist-info/RECORD,,