scraper2-hj3415 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
File without changes
File without changes
@@ -0,0 +1,34 @@
1
+ # Define here the models for your scraped items
2
+ #
3
+ # See documentation in:
4
+ # https://docs.scrapy.org/en/latest/topics/items.html
5
+
6
+ import scrapy
7
+
8
+
9
+ class C101items(scrapy.Item):
10
+ date = scrapy.Field()
11
+ 종목명 = scrapy.Field()
12
+ 코드 = scrapy.Field()
13
+ 업종 = scrapy.Field()
14
+ EPS = scrapy.Field() # 주당순이익 EPS*PER = 주가
15
+ BPS = scrapy.Field() # 주당장부가치
16
+ PER = scrapy.Field() # 기업가치가 순이익에 몇배에 거래되는가
17
+ 업종PER = scrapy.Field()
18
+ PBR = scrapy.Field()
19
+ 배당수익률 = scrapy.Field()
20
+
21
+ 주가 = scrapy.Field()
22
+ 최고52주 = scrapy.Field()
23
+ 최저52주 = scrapy.Field()
24
+ 거래량 = scrapy.Field()
25
+ 거래대금 = scrapy.Field()
26
+ 시가총액 = scrapy.Field()
27
+ 베타52주 = scrapy.Field()
28
+ 발행주식 = scrapy.Field()
29
+ 유통비율 = scrapy.Field()
30
+
31
+ #기업개요 파트
32
+ intro1 = scrapy.Field()
33
+ intro2 = scrapy.Field()
34
+ intro3 = scrapy.Field()
@@ -0,0 +1,103 @@
1
+ # Define here the models for your spider middleware
2
+ #
3
+ # See documentation in:
4
+ # https://docs.scrapy.org/en/latest/topics/spider-middleware.html
5
+
6
+ from scrapy import signals
7
+
8
+ # useful for handling different item types with a single interface
9
+ from itemadapter import is_item, ItemAdapter
10
+
11
+
12
+ class NfsSpiderMiddleware:
13
+ # Not all methods need to be defined. If a method is not defined,
14
+ # scrapy acts as if the spider middleware does not modify the
15
+ # passed objects.
16
+
17
+ @classmethod
18
+ def from_crawler(cls, crawler):
19
+ # This method is used by Scrapy to create your spiders.
20
+ s = cls()
21
+ crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
22
+ return s
23
+
24
+ def process_spider_input(self, response, spider):
25
+ # Called for each response that goes through the spider
26
+ # middleware and into the spider.
27
+
28
+ # Should return None or raise an exception.
29
+ return None
30
+
31
+ def process_spider_output(self, response, result, spider):
32
+ # Called with the results returned from the Spider, after
33
+ # it has processed the response.
34
+
35
+ # Must return an iterable of Request, or item objects.
36
+ for i in result:
37
+ yield i
38
+
39
+ def process_spider_exception(self, response, exception, spider):
40
+ # Called when a spider or process_spider_input() method
41
+ # (from other spider middleware) raises an exception.
42
+
43
+ # Should return either None or an iterable of Request or item objects.
44
+ pass
45
+
46
+ def process_start_requests(self, start_requests, spider):
47
+ # Called with the start requests of the spider, and works
48
+ # similarly to the process_spider_output() method, except
49
+ # that it doesn’t have a response associated.
50
+
51
+ # Must return only requests (not items).
52
+ for r in start_requests:
53
+ yield r
54
+
55
+ def spider_opened(self, spider):
56
+ spider.logger.info("Spider opened: %s" % spider.name)
57
+
58
+
59
+ class NfsDownloaderMiddleware:
60
+ # Not all methods need to be defined. If a method is not defined,
61
+ # scrapy acts as if the downloader middleware does not modify the
62
+ # passed objects.
63
+
64
+ @classmethod
65
+ def from_crawler(cls, crawler):
66
+ # This method is used by Scrapy to create your spiders.
67
+ s = cls()
68
+ crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
69
+ return s
70
+
71
+ def process_request(self, request, spider):
72
+ # Called for each request that goes through the downloader
73
+ # middleware.
74
+
75
+ # Must either:
76
+ # - return None: continue processing this request
77
+ # - or return a Response object
78
+ # - or return a Request object
79
+ # - or raise IgnoreRequest: process_exception() methods of
80
+ # installed downloader middleware will be called
81
+ return None
82
+
83
+ def process_response(self, request, response, spider):
84
+ # Called with the response returned from the downloader.
85
+
86
+ # Must either;
87
+ # - return a Response object
88
+ # - return a Request object
89
+ # - or raise IgnoreRequest
90
+ return response
91
+
92
+ def process_exception(self, request, exception, spider):
93
+ # Called when a download handler or a process_request()
94
+ # (from other downloader middleware) raises an exception.
95
+
96
+ # Must either:
97
+ # - return None: continue processing this exception
98
+ # - return a Response object: stops process_exception() chain
99
+ # - return a Request object: stops process_exception() chain
100
+ pass
101
+
102
+ def spider_opened(self, spider):
103
+ spider.logger.info("Spider opened: %s" % spider.name)
@@ -0,0 +1,26 @@
1
+ from itemadapter import ItemAdapter
2
+ from pymongo import errors
3
+
4
+
5
+ class MongoPipeline:
6
+
7
+ collection_name = 'c101'
8
+
9
+ def open_spider(self, spider):
10
+ self.client = spider.mongo_client
11
+
12
+ def process_item(self, item, spider):
13
+ print("In the MongoPipeline...", end="")
14
+ if self.client is None:
15
+ print("Skip the save to mongoDB.")
16
+ else:
17
+ print("Save data to mongoDB.")
18
+ my_collection = self.client[item['코드']][self.collection_name]
19
+ try:
20
+ my_collection.create_index('date', unique=True)
21
+ my_collection.insert_one(ItemAdapter(item).asdict())
22
+ except errors.DuplicateKeyError:
23
+ # 스크랩한 데이터가 이미 데이터베이스에 있을경우 지우고 저장
24
+ my_collection.delete_many({'date': {"$gte": item['date']}})
25
+ my_collection.insert_one(ItemAdapter(item).asdict())
26
+ return item
@@ -0,0 +1,100 @@
1
+ # Scrapy settings for nfs project
2
+ #
3
+ # For simplicity, this file contains only settings considered important or
4
+ # commonly used. You can find more settings consulting the documentation:
5
+ #
6
+ # https://docs.scrapy.org/en/latest/topics/settings.html
7
+ # https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
8
+ # https://docs.scrapy.org/en/latest/topics/spider-middleware.html
9
+
10
+ BOT_NAME = "nfs"
11
+
12
+ SPIDER_MODULES = ["nfs.spiders"]
13
+ NEWSPIDER_MODULE = "nfs.spiders"
14
+
15
+
16
+ # Crawl responsibly by identifying yourself (and your website) on the user-agent
17
+ #USER_AGENT = "nfs (+http://www.yourdomain.com)"
18
+
19
+ # Obey robots.txt rules
20
+ ROBOTSTXT_OBEY = False
21
+
22
+ # Configure maximum concurrent requests performed by Scrapy (default: 16)
23
+ #CONCURRENT_REQUESTS = 32
24
+
25
+ # Configure a delay for requests for the same website (default: 0)
26
+ # See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
27
+ # See also autothrottle settings and docs
28
+ DOWNLOAD_DELAY = 3
29
+ RANDOMIZE_DOWNLOAD_DELAY = True
30
+
31
+ # The download delay setting will honor only one of:
32
+ #CONCURRENT_REQUESTS_PER_DOMAIN = 16
33
+ #CONCURRENT_REQUESTS_PER_IP = 16
34
+
35
+ # Disable cookies (enabled by default)
36
+ COOKIES_ENABLED = False
37
+
38
+ # Disable Telnet Console (enabled by default)
39
+ #TELNETCONSOLE_ENABLED = False
40
+
41
+ # Override the default request headers:
42
+ #DEFAULT_REQUEST_HEADERS = {
43
+ # "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
44
+ # "Accept-Language": "en",
45
+ #}
46
+
47
+ # Enable or disable spider middlewares
48
+ # See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
49
+ #SPIDER_MIDDLEWARES = {
50
+ # "nfs.middlewares.NfsSpiderMiddleware": 543,
51
+ #}
52
+
53
+ # Enable or disable downloader middlewares
54
+ # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
55
+ #DOWNLOADER_MIDDLEWARES = {
56
+ # "nfs.middlewares.NfsDownloaderMiddleware": 543,
57
+ #}
58
+
59
+ # Enable or disable extensions
60
+ # See https://docs.scrapy.org/en/latest/topics/extensions.html
61
+ #EXTENSIONS = {
62
+ # "scrapy.extensions.telnet.TelnetConsole": None,
63
+ #}
64
+
65
+ # Configure item pipelines
66
+ # See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
67
+ ITEM_PIPELINES = {
68
+ "nfs.pipelines.MongoPipeline": 300,
69
+ }
70
+
71
+ # Enable and configure the AutoThrottle extension (disabled by default)
72
+ # See https://docs.scrapy.org/en/latest/topics/autothrottle.html
73
+ #AUTOTHROTTLE_ENABLED = True
74
+ # The initial download delay
75
+ #AUTOTHROTTLE_START_DELAY = 5
76
+ # The maximum download delay to be set in case of high latencies
77
+ #AUTOTHROTTLE_MAX_DELAY = 60
78
+ # The average number of requests Scrapy should be sending in parallel to
79
+ # each remote server
80
+ #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
81
+ # Enable showing throttling stats for every response received:
82
+ #AUTOTHROTTLE_DEBUG = False
83
+
84
+ # Enable and configure HTTP caching (disabled by default)
85
+ # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
86
+ #HTTPCACHE_ENABLED = True
87
+ #HTTPCACHE_EXPIRATION_SECS = 0
88
+ #HTTPCACHE_DIR = "httpcache"
89
+ #HTTPCACHE_IGNORE_HTTP_CODES = []
90
+ #HTTPCACHE_STORAGE = "scrapy.extensions.httpcache.FilesystemCacheStorage"
91
+
92
+ # Set settings whose default value is deprecated to a future-proof value
93
+ REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7"
94
+ TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
95
+ FEED_EXPORT_ENCODING = "utf-8"
96
+
97
+
98
+
99
+ LOG_ENABLED = True
100
+ LOG_LEVEL = 'WARNING'
@@ -0,0 +1,4 @@
1
+ # This package will contain the spiders of your Scrapy project
2
+ #
3
+ # Please refer to the documentation for information on how to create and manage
4
+ # your spiders.
@@ -0,0 +1,74 @@
1
+ import scrapy
2
+ from nfs import items
3
+ #from nfs.spiders import common
4
+
5
+ # cmd usage : scrapy crawl c101 -a codes=005930
6
+
7
+ # 여러 잡다한 기호를 없어거나 교체하는 람다함수
8
+ cleaning = lambda s: (
9
+ s.strip().replace('\t', '').replace('\r', '').replace('\n', '').replace(',', '').replace('원', '')
10
+ .replace('주', '').replace('억', '00000000').replace('%', '') if s is not None and s != 'N/A' else None
11
+ )
12
+ css_path = lambda i, j: f'#pArea>div.wrapper-table>div>table>tr:nth-child({i})>td>dl>dt:nth-child({j})>'
13
+ css_path2 = lambda i: f'#cTB11>tbody>tr:nth-child({i})>'
14
+ str_or_blank = lambda i: '' if i is None else str(i)
15
+
16
+
17
+ class C101Spider(scrapy.Spider):
18
+ name = 'c101'
19
+ allowed_domains = ['navercomp.wisereport.co.kr'] # https 주소
20
+
21
+ def __init__(self, codes, mongo_client, *args, **kwargs):
22
+ super(C101Spider, self).__init__(*args, **kwargs)
23
+ # 커맨드 라인에서 입력은 항상 문자열로만 받을수 있어서 리스트로 변환이 필요함.
24
+ if type(codes) == str:
25
+ self.codes = [codes, ]
26
+ elif type(codes) == list:
27
+ self.codes = codes
28
+ else:
29
+ raise TypeError
30
+ self.mongo_client = mongo_client
31
+
32
+ def start_requests(self):
33
+ total_count = len(self.codes)
34
+ print(f'Start scraping {self.name}, {total_count} items...')
35
+ self.logger.info(f'entire codes list - {self.codes}')
36
+ for i, one_code in enumerate(self.codes):
37
+ print(f'{i + 1}/{total_count}. Parsing {self.name}...{one_code}')
38
+ # reference from https://docs.scrapy.org/en/latest/topics/request-response.html
39
+ yield scrapy.Request(url=f'https://navercomp.wisereport.co.kr/v2/company/c1010001.aspx?cmp_cd={one_code}',
40
+ callback=self.parse_c101,
41
+ cb_kwargs=dict(code=one_code),
42
+ )
43
+
44
+ def parse_c101(self, response, code):
45
+ self.logger.debug(response.text)
46
+ item = items.C101items()
47
+ try:
48
+ item['date'] = response.xpath('//*[ @ id = "wrapper"]/div[1]/div[1]/div[1]/dl/dd[2]/p/text()')\
49
+ .get().replace('[기준:', '').replace(']', '')
50
+ except AttributeError:
51
+ self.logger.error(f'ERROR : Page not found...{code}')
52
+ return None
53
+ item['종목명'] = response.css(css_path(1, 1) + 'span::text').get()
54
+ item['코드'] = cleaning(response.css(css_path(1, 1) + 'b::text').get())
55
+ item['업종'] = response.css(css_path(1, 4).rstrip('>') + '::text').get().replace('WICS : ', '')
56
+ item['EPS'] = cleaning(response.css(css_path(3, 1) + 'b::text').get())
57
+ item['BPS'] = cleaning(response.css(css_path(3, 2) + 'b::text').get())
58
+ item['PER'] = cleaning(response.css(css_path(3, 3) + 'b::text').get())
59
+ item['업종PER'] = cleaning(response.css(css_path(3, 4) + 'b::text').get())
60
+ item['PBR'] = cleaning(response.css(css_path(3, 5) + 'b::text').get())
61
+ item['배당수익률'] = cleaning(response.css(css_path(3, 6) + 'b::text').get())
62
+ item['주가'] = cleaning(response.css(css_path2(1) + 'td>strong::text').get())
63
+ item['최고52주'], item['최저52주'] = map(cleaning, response.css(css_path2(2) + 'td::text').get().split('/'))
64
+ item['거래량'], item['거래대금'] = map(cleaning, response.css(css_path2(4) + 'td::text').get().split('/'))
65
+ item['시가총액'] = cleaning(response.css(css_path2(5) + 'td::text').get())
66
+ item['베타52주'] = cleaning(response.css(css_path2(6) + 'td::text').get())
67
+ item['발행주식'], item['유통비율'] = map(cleaning, response.css(css_path2(7) + 'td::text').get().split('/'))
68
+
69
+ item['intro1'] = str_or_blank(response.xpath('// *[ @ id = "wrapper"] / div[5] / div[2] / ul / li[1] / text()').get())
70
+ item['intro2'] = str_or_blank(response.xpath('// *[ @ id = "wrapper"] / div[5] / div[2] / ul / li[2] / text()').get())
71
+ item['intro3'] = str_or_blank(response.xpath('// *[ @ id = "wrapper"] / div[5] / div[2] / ul / li[3] / text()').get())
72
+
73
+ self.logger.info(item)
74
+ yield item
@@ -0,0 +1,160 @@
1
+ import os
2
+ import time
3
+ import pymongo
4
+
5
+ from scrapy.crawler import CrawlerProcess
6
+ from multiprocessing import Process, cpu_count
7
+ from scrapy.utils.project import get_project_settings
8
+
9
+ from util_hj3415 import utils
10
+
11
+ import logging
12
+ logger = logging.getLogger(__name__)
13
+ formatter = logging.Formatter('%(levelname)s: [%(name)s] %(message)s')
14
+ ch = logging.StreamHandler()
15
+ ch.setFormatter(formatter)
16
+ logger.addHandler(ch)
17
+ logger.setLevel(logging.ERROR)
18
+
19
+
20
+ def chcwd(func):
21
+ """
22
+ scrapy는 항상 프로젝트 내부에서 실행해야 하기 때문에 일시적으로 현재 실행 경로를 변경해주는 목적의 데코레이션 함수
23
+ :param func:
24
+ :return:
25
+ """
26
+ def wrapper(*args, **kwargs):
27
+ before_cwd = os.getcwd()
28
+ logger.info(f'current path : {before_cwd}')
29
+ after_cwd = os.path.dirname(os.path.realpath(__file__))
30
+ logger.info(f'change path to {after_cwd}')
31
+ os.chdir(after_cwd)
32
+ func(*args, **kwargs)
33
+ logger.info(f'restore path to {before_cwd}')
34
+ os.chdir(before_cwd)
35
+ return wrapper
36
+
37
+
38
+ def _run_scrapy(spider: str, codes: list, mongo_addr: str):
39
+ """
40
+ scrapy 스파이더를 스크립트로 실행할 수 있는 함수
41
+ :param spider:
42
+ :param codes:
43
+ :param mongo_addr:
44
+ :return:
45
+ """
46
+ if mongo_addr == "":
47
+ mongo_client = None
48
+ else:
49
+ mongo_client = connect_mongo(mongo_addr)
50
+
51
+ process = CrawlerProcess(get_project_settings())
52
+ process.crawl(spider, codes=codes, mongo_client=mongo_client)
53
+ process.start()
54
+
55
+ if mongo_client is not None:
56
+ mongo_client.close()
57
+
58
+
59
+ def _code_divider(entire_codes: list) -> tuple:
60
+ """
61
+ 전체 종목 코드를 리스트로 넣으면 cpu 코어에 맞춰 나눠 준다.
62
+ reference from https://stackoverflow.com/questions/19086106/how-to-utilize-all-cores-with-python-multiprocessing
63
+ :param entire_codes:
64
+ :return:
65
+ """
66
+ def _split_list(alist, wanted_parts=1):
67
+ """
68
+ 멀티프로세싱할 갯수로 리스트를 나눈다.
69
+ reference from https://www.it-swarm.dev/ko/python/%EB%8D%94-%EC%9E%91%EC%9D%80-%EB%AA%A9%EB%A1%9D%EC%9C%BC%EB%
70
+ A1%9C-%EB%B6%84%ED%95%A0-%EB%B0%98%EC%9C%BC%EB%A1%9C-%EB%B6%84%ED%95%A0/957910776/
71
+ :param alist:
72
+ :param wanted_parts:
73
+ :return:
74
+ """
75
+ length = len(alist)
76
+ return [alist[i * length // wanted_parts: (i + 1) * length // wanted_parts]
77
+ for i in range(wanted_parts)]
78
+
79
+ core = cpu_count()
80
+ print(f'Get number of core for multiprocessing : {core}')
81
+ n = core - 1
82
+ if len(entire_codes) < n:
83
+ n = len(entire_codes)
84
+ print(f'Split total {len(entire_codes)} codes by {n} parts ...')
85
+ divided_list = _split_list(entire_codes, wanted_parts=n)
86
+ return n, divided_list
87
+
88
+
89
+ def _mp_c10168(page: str, codes: list, mongo_addr: str):
90
+ """
91
+ 전체 코드를 코어수 대로 나눠서 멀티 프로세싱 시행
92
+ reference from https://monkey3199.github.io/develop/python/2018/12/04/python-pararrel.html
93
+
94
+ 멀티프로세싱시 mongoclient를 만들어서 호출하는 방식은 에러가 발생하니 각 프로세스에서 개별적으로 생성해야한다.
95
+ referred from https://blog.naver.com/PostView.nhn?blogId=stop2y&logNo=222211823932&categoryNo=136&parentCategoryNo=
96
+ 0&viewDate=&currentPage=1&postListTopCurrentPage=1&from=postView
97
+
98
+ :param page:
99
+ :param codes:
100
+ :param mongo_addr:
101
+ :return:
102
+ """
103
+
104
+ if page not in ('c101', 'c106', 'c108'):
105
+ raise NameError
106
+ print('*' * 25, f"Scrape multiprocess {page.capitalize()}", '*' * 25)
107
+ print(f'Total {len(codes)} items..')
108
+ logger.info(codes)
109
+ n, divided_list = _code_divider(codes)
110
+
111
+ start_time = time.time()
112
+ ths = []
113
+ error = False
114
+ for i in range(n):
115
+ ths.append(Process(target=_run_scrapy, args=(page, divided_list[i], mongo_addr)))
116
+ for i in range(n):
117
+ ths[i].start()
118
+ for i in range(n):
119
+ ths[i].join()
120
+ print(f'Total spent time : {round(time.time() - start_time, 2)} sec.')
121
+
122
+
123
+ def connect_mongo(addr: str, timeout=5) -> pymongo.MongoClient:
124
+ """
125
+ 몽고 클라이언트를 만들어주는 함수.
126
+ 필요할 때마다 클라이언트를 생성하는 것보다 클라이언트 한개로 데이터베이스를 다루는게 효율적이라 함수를 따로 뺐음.
127
+ resolve conn error - https://stackoverflow.com/questions/54484890/ssl-handshake-issue-with-pymongo-on-python3
128
+ :param addr:
129
+ :param timeout:
130
+ :return:
131
+ """
132
+ import certifi
133
+ ca = certifi.where()
134
+ if addr.startswith('mongodb://'):
135
+ # set a some-second connection timeout
136
+ client = pymongo.MongoClient(addr, serverSelectionTimeoutMS=timeout * 1000)
137
+ elif addr.startswith('mongodb+srv://'):
138
+ client = pymongo.MongoClient(addr, serverSelectionTimeoutMS=timeout * 1000, tlsCAFile=ca)
139
+ else:
140
+ raise Exception(f"Invalid address: {addr}")
141
+ try:
142
+ srv_info = client.server_info()
143
+ conn_str = f"Connect to Mongo Atlas v{srv_info['version']}..."
144
+ print(conn_str, f"Server Addr : {addr}")
145
+ return client
146
+ except Exception:
147
+ conn_str = f"Unable to connect to the server.(MY IP : {utils.get_ip_addr()})"
148
+ raise Exception(f"{conn_str} Server Addr : {addr}")
149
+
150
+
151
+ @chcwd
152
+ def c101(codes: list, mongo_addr: str = ""):
153
+ """
154
+ c101을 외부에서 실행할 수 있는 함수
155
+ :param codes: 종목코드 리스트
156
+ :param mongo_addr: 몽고데이터베이스 URI - mongodb://...
157
+ :return:
158
+ """
159
+ _mp_c10168('c101', codes=codes, mongo_addr=mongo_addr)
160
+
@@ -0,0 +1,11 @@
1
+ # Automatically created by: scrapy startproject
2
+ #
3
+ # For more information about the [deploy] section see:
4
+ # https://scrapyd.readthedocs.io/en/latest/deploy.html
5
+
6
+ [settings]
7
+ default = nfs.settings
8
+
9
+ [deploy]
10
+ #url = http://localhost:6800/
11
+ project = nfs
@@ -0,0 +1,26 @@
1
+ Metadata-Version: 2.1
2
+ Name: scraper2_hj3415
3
+ Version: 0.0.1
4
+ Summary: Gathering the stock data
5
+ Project-URL: Homepage, https://pypi.org/project/scraper2_hj3415/
6
+ Project-URL: Bug Tracker, https://pypi.org/project/scraper2_hj3415/
7
+ Author-email: Hyungjin Kim <hj3415@gmail.com>
8
+ License-File: LICENSE
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Programming Language :: Python :: 3
12
+ Requires-Python: >=3.7
13
+ Description-Content-Type: text/markdown
14
+
15
+ # scraper2_hj3415 Package
16
+
17
+ This is a simple example package. You can use
18
+ [Github-flavored Markdown](https://guides.github.com/features/mastering-markdown/)
19
+ to write your content.
20
+
21
+ 새로운 scrapy 프로젝트를 만들 때는 프로젝트 가장 외부폴더명과 그 안쪽 폴더명이 달라야한다. 안그러면 setting파일을 찾을수 없다는 에러가 발생한다.
22
+ (ex - nfscrapy/nfs(프로젝트명은 nfs로 하고 외부 폴더명을 nfscrapy로 변경))
23
+
24
+ [Tutorials - How to packaging](https://packaging.python.org/en/latest/tutorials/packaging-projects/)
25
+
26
+
@@ -0,0 +1,15 @@
1
+ scraper2_hj3415/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ scraper2_hj3415/nfscrapy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ scraper2_hj3415/nfscrapy/run.py,sha256=G4UVhG0LYEdljiC7hbKCsiFVHSixmU3cOQEI0pM6Xso,5768
4
+ scraper2_hj3415/nfscrapy/scrapy.cfg,sha256=yCkEgpzAwc9NWjYGaEUelGdLg3mUuuQF1Zl0k5vITH8,260
5
+ scraper2_hj3415/nfscrapy/nfs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ scraper2_hj3415/nfscrapy/nfs/items.py,sha256=zKTQWz-IrA_MLY86oVExFhbFq-bgBEHMFlmfZL_CwGA,1017
7
+ scraper2_hj3415/nfscrapy/nfs/middlewares.py,sha256=F3kL0OJMsUhiw2mPIxQLGvY3wdMxIsQl1BG2uwo_ZHo,3745
8
+ scraper2_hj3415/nfscrapy/nfs/pipelines.py,sha256=gMM_Jfm7dbAfaTux1YvfbJ34vF-BBoIq6QZqX9wN0Uc,989
9
+ scraper2_hj3415/nfscrapy/nfs/settings.py,sha256=UsmKxrDTNboO10CzeFi3IhNiTpiKg-gvNySSIlNiogQ,3426
10
+ scraper2_hj3415/nfscrapy/nfs/spiders/__init__.py,sha256=ULwecZkx3_NTphkz7y_qiazBeUoHFnCCWnKSjoDCZj0,161
11
+ scraper2_hj3415/nfscrapy/nfs/spiders/c101.py,sha256=WKqLQ5EzJTnH1uozyhO7j8Invv4kBy0JCFy4nemd4PI,4233
12
+ scraper2_hj3415-0.0.1.dist-info/METADATA,sha256=EQ_vUTFP3ayPQFr510jgVchhwNWQtkVu78sCGFuZShU,1082
13
+ scraper2_hj3415-0.0.1.dist-info/WHEEL,sha256=Fd6mP6ydyRguakwUJ05oBE7fh2IPxgtDN9IwHJ9OqJQ,87
14
+ scraper2_hj3415-0.0.1.dist-info/licenses/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
15
+ scraper2_hj3415-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.13.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2018 The Python Packaging Authority
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ SOFTWARE.