coocan 0.4.5__tar.gz → 0.4.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
coocan-0.4.7/PKG-INFO ADDED
@@ -0,0 +1,90 @@
1
+ Metadata-Version: 2.1
2
+ Name: coocan
3
+ Version: 0.4.7
4
+ Summary: Air Spider Framework
5
+ Author: wauo
6
+ Author-email: markadc@126.com
7
+ Requires-Python: >=3.10
8
+ Description-Content-Type: text/markdown
9
+
10
+ # 项目说明
11
+
12
+ - 一个非常轻量的异步爬虫框架
13
+
14
+ # 项目地址
15
+
16
+ - https://github.com/markadc/coocan
17
+
18
+ ## demo
19
+
20
+ - 效果
21
+ <br>
22
+ ![效果](coocan/demo.gif)
23
+
24
+
25
+ - 代码
26
+
27
+ ```python
28
+ import json
29
+
30
+ from loguru import logger
31
+
32
+ import coocan
33
+ from coocan import Request, MiniSpider
34
+
35
+
36
+ class CSDNDetailSpider(MiniSpider):
37
+ start_urls = ['http://www.csdn.net']
38
+ max_requests = 10
39
+
40
+ def middleware(self, request: Request):
41
+ request.headers["Referer"] = "http://www.csdn.net/"
42
+
43
+ def parse(self, response):
44
+ api = "https://blog.csdn.net/community/home-api/v1/get-business-list"
45
+ params = {
46
+ "page": "1",
47
+ "size": "20",
48
+ "businessType": "lately",
49
+ "noMore": "false",
50
+ "username": "markadc"
51
+ }
52
+ yield Request(api, self.parse_page, params=params, cb_kwargs={"api": api, "params": params})
53
+
54
+ def parse_page(self, response, api, params):
55
+ current_page = params["page"]
56
+ data = json.loads(response.text)
57
+ some = data["data"]["list"]
58
+
59
+ if not some:
60
+ logger.warning("没有第 {} 页".format(current_page))
61
+ return
62
+
63
+ for one in some:
64
+ date = one["formatTime"]
65
+ name = one["title"]
66
+ detail_url = one["url"]
67
+ logger.info(
68
+ """
69
+ {}
70
+ {}
71
+ {}
72
+ """.format(date, name, detail_url)
73
+ )
74
+ yield coocan.Request(detail_url, self.parse_detail, cb_kwargs={"title": name})
75
+
76
+ logger.info("第 {} 页抓取成功".format(params["page"]))
77
+
78
+ # 抓取下一页
79
+ next_page = int(current_page) + 1
80
+ params["page"] = str(next_page)
81
+ yield Request(api, self.parse_page, params=params, cb_kwargs={"api": api, "params": params})
82
+
83
+ def parse_detail(self, response, title):
84
+ logger.success("{} 已访问 {}".format(response.status_code, title))
85
+
86
+
87
+ if __name__ == '__main__':
88
+ s = CSDNDetailSpider()
89
+ s.go()
90
+ ```
coocan-0.4.7/README.md ADDED
@@ -0,0 +1,81 @@
1
+ # 项目说明
2
+
3
+ - 一个非常轻量的异步爬虫框架
4
+
5
+ # 项目地址
6
+
7
+ - https://github.com/markadc/coocan
8
+
9
+ ## demo
10
+
11
+ - 效果
12
+ <br>
13
+ ![效果](coocan/demo.gif)
14
+
15
+
16
+ - 代码
17
+
18
+ ```python
19
+ import json
20
+
21
+ from loguru import logger
22
+
23
+ import coocan
24
+ from coocan import Request, MiniSpider
25
+
26
+
27
+ class CSDNDetailSpider(MiniSpider):
28
+ start_urls = ['http://www.csdn.net']
29
+ max_requests = 10
30
+
31
+ def middleware(self, request: Request):
32
+ request.headers["Referer"] = "http://www.csdn.net/"
33
+
34
+ def parse(self, response):
35
+ api = "https://blog.csdn.net/community/home-api/v1/get-business-list"
36
+ params = {
37
+ "page": "1",
38
+ "size": "20",
39
+ "businessType": "lately",
40
+ "noMore": "false",
41
+ "username": "markadc"
42
+ }
43
+ yield Request(api, self.parse_page, params=params, cb_kwargs={"api": api, "params": params})
44
+
45
+ def parse_page(self, response, api, params):
46
+ current_page = params["page"]
47
+ data = json.loads(response.text)
48
+ some = data["data"]["list"]
49
+
50
+ if not some:
51
+ logger.warning("没有第 {} 页".format(current_page))
52
+ return
53
+
54
+ for one in some:
55
+ date = one["formatTime"]
56
+ name = one["title"]
57
+ detail_url = one["url"]
58
+ logger.info(
59
+ """
60
+ {}
61
+ {}
62
+ {}
63
+ """.format(date, name, detail_url)
64
+ )
65
+ yield coocan.Request(detail_url, self.parse_detail, cb_kwargs={"title": name})
66
+
67
+ logger.info("第 {} 页抓取成功".format(params["page"]))
68
+
69
+ # 抓取下一页
70
+ next_page = int(current_page) + 1
71
+ params["page"] = str(next_page)
72
+ yield Request(api, self.parse_page, params=params, cb_kwargs={"api": api, "params": params})
73
+
74
+ def parse_detail(self, response, title):
75
+ logger.success("{} 已访问 {}".format(response.status_code, title))
76
+
77
+
78
+ if __name__ == '__main__':
79
+ s = CSDNDetailSpider()
80
+ s.go()
81
+ ```
@@ -0,0 +1,66 @@
1
+ import re
2
+ from pathlib import Path
3
+
4
+ import click
5
+
6
+ TEMPLATE_DIR = Path(__file__).parent / "templates"
7
+
8
+ help_info = """
9
+ ██████╗ ██████╗ ██████╗ ██████╗ █████╗ ███╗ ██╗
10
+ ██╔════╝██╔═══██╗██╔═══██╗██╔════╝██╔══██╗████╗ ██║
11
+ ██║ ██║ ██║██║ ██║██║ ███████║██╔██╗ ██║
12
+ ██║ ██║ ██║██║ ██║██║ ██╔══██║██║╚██╗██║
13
+ ╚██████╗╚██████╔╝╚██████╔╝╚██████╗██║ ██║██║ ╚████║
14
+ ╚═════╝ ╚═════╝ ╚═════╝ ╚═════╝╚═╝ ╚═╝╚═╝ ╚═══╝
15
+ """
16
+
17
+
18
+ def show_help_info():
19
+ print(help_info)
20
+
21
+
22
+ def snake_to_pascal(snake_str: str):
23
+ """小蛇变成大驼峰"""
24
+ words = snake_str.split('_')
25
+ pascal_str = ''.join(word.capitalize() for word in words)
26
+ return pascal_str
27
+
28
+
29
+ @click.group(invoke_without_command=True) # 允许无子命令调用
30
+ @click.pass_context
31
+ def main(ctx):
32
+ if ctx.invoked_subcommand is None:
33
+ show_help_info()
34
+ click.echo("cc new -s <spider_file_name>")
35
+
36
+
37
+ @main.command()
38
+ @click.option('-s', '--spider', required=True, help='爬虫文件名字')
39
+ def new(spider):
40
+ """新建"""
41
+ if not re.search("^[a-zA-Z0-9_]*$", spider):
42
+ click.echo("只支持字母、数字、下划线")
43
+ return
44
+
45
+ pascal = snake_to_pascal(spider)
46
+ if not pascal.endswith("Spider"):
47
+ pascal += "Spider"
48
+
49
+ try:
50
+ template_path = TEMPLATE_DIR / "spider.txt"
51
+ with open(template_path, 'r') as f:
52
+ text = f.read()
53
+ spider_py_text = text.replace("{SpiderClassName}", pascal)
54
+
55
+ with open("{}.py".format(spider), 'w') as f:
56
+ f.write(spider_py_text)
57
+
58
+ click.echo("Success")
59
+
60
+ except Exception as e:
61
+ click.echo(str(e))
62
+ raise click.ClickException("Failed")
63
+
64
+
65
+ if __name__ == '__main__':
66
+ main()
@@ -1,7 +1,7 @@
1
1
  from coocan import Request, Response, MiniSpider
2
2
 
3
3
 
4
- class Spider(MiniSpider):
4
+ class {SpiderClassName}(MiniSpider):
5
5
  start_urls = ['https://github.com/markadc/coocan']
6
6
  max_requests = 10
7
7
 
@@ -10,3 +10,8 @@ class Spider(MiniSpider):
10
10
 
11
11
  def parse(self, response: Response):
12
12
  pass
13
+
14
+
15
+ if __name__ == '__main__':
16
+ s = {SpiderClassName}()
17
+ s.go()
@@ -0,0 +1,90 @@
1
+ Metadata-Version: 2.1
2
+ Name: coocan
3
+ Version: 0.4.7
4
+ Summary: Air Spider Framework
5
+ Author: wauo
6
+ Author-email: markadc@126.com
7
+ Requires-Python: >=3.10
8
+ Description-Content-Type: text/markdown
9
+
10
+ # 项目说明
11
+
12
+ - 一个非常轻量的异步爬虫框架
13
+
14
+ # 项目地址
15
+
16
+ - https://github.com/markadc/coocan
17
+
18
+ ## demo
19
+
20
+ - 效果
21
+ <br>
22
+ ![效果](coocan/demo.gif)
23
+
24
+
25
+ - 代码
26
+
27
+ ```python
28
+ import json
29
+
30
+ from loguru import logger
31
+
32
+ import coocan
33
+ from coocan import Request, MiniSpider
34
+
35
+
36
+ class CSDNDetailSpider(MiniSpider):
37
+ start_urls = ['http://www.csdn.net']
38
+ max_requests = 10
39
+
40
+ def middleware(self, request: Request):
41
+ request.headers["Referer"] = "http://www.csdn.net/"
42
+
43
+ def parse(self, response):
44
+ api = "https://blog.csdn.net/community/home-api/v1/get-business-list"
45
+ params = {
46
+ "page": "1",
47
+ "size": "20",
48
+ "businessType": "lately",
49
+ "noMore": "false",
50
+ "username": "markadc"
51
+ }
52
+ yield Request(api, self.parse_page, params=params, cb_kwargs={"api": api, "params": params})
53
+
54
+ def parse_page(self, response, api, params):
55
+ current_page = params["page"]
56
+ data = json.loads(response.text)
57
+ some = data["data"]["list"]
58
+
59
+ if not some:
60
+ logger.warning("没有第 {} 页".format(current_page))
61
+ return
62
+
63
+ for one in some:
64
+ date = one["formatTime"]
65
+ name = one["title"]
66
+ detail_url = one["url"]
67
+ logger.info(
68
+ """
69
+ {}
70
+ {}
71
+ {}
72
+ """.format(date, name, detail_url)
73
+ )
74
+ yield coocan.Request(detail_url, self.parse_detail, cb_kwargs={"title": name})
75
+
76
+ logger.info("第 {} 页抓取成功".format(params["page"]))
77
+
78
+ # 抓取下一页
79
+ next_page = int(current_page) + 1
80
+ params["page"] = str(next_page)
81
+ yield Request(api, self.parse_page, params=params, cb_kwargs={"api": api, "params": params})
82
+
83
+ def parse_detail(self, response, title):
84
+ logger.success("{} 已访问 {}".format(response.status_code, title))
85
+
86
+
87
+ if __name__ == '__main__':
88
+ s = CSDNDetailSpider()
89
+ s.go()
90
+ ```
@@ -1,3 +1,4 @@
1
+ README.md
1
2
  setup.py
2
3
  coocan/__init__.py
3
4
  coocan/gen.py
@@ -9,8 +10,7 @@ coocan.egg-info/requires.txt
9
10
  coocan.egg-info/top_level.txt
10
11
  coocan/cmd/__init__.py
11
12
  coocan/cmd/cli.py
12
- coocan/cmd/templates/__init__.py
13
- coocan/cmd/templates/spider.py
13
+ coocan/cmd/templates/spider.txt
14
14
  coocan/spider/__init__.py
15
15
  coocan/spider/base.py
16
16
  coocan/url/__init__.py
@@ -1,13 +1,20 @@
1
1
  from setuptools import setup, find_packages
2
2
 
3
+ with open("README.md", "r", encoding="utf-8") as f:
4
+ long_description = f.read()
5
+
3
6
  setup(
4
7
  name="coocan",
5
- version="0.4.5",
8
+ version="0.4.7",
6
9
  author="wauo",
7
10
  author_email="markadc@126.com",
8
11
  description="Air Spider Framework",
9
12
  packages=find_packages(),
10
13
  python_requires=">=3.10",
14
+
15
+ long_description=long_description,
16
+ long_description_content_type="text/markdown",
17
+
11
18
  install_requires=[
12
19
  'click>=8.0.0', 'httpx', 'loguru'
13
20
  ],
@@ -15,5 +22,9 @@ setup(
15
22
  'console_scripts': [
16
23
  'cc=coocan.cmd.cli:main',
17
24
  ],
18
- }
25
+ },
26
+ package_data={
27
+ 'coocan.cmd': ['templates/*'],
28
+ },
29
+ include_package_data=True
19
30
  )
coocan-0.4.5/PKG-INFO DELETED
@@ -1,7 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: coocan
3
- Version: 0.4.5
4
- Summary: Air Spider Framework
5
- Author: wauo
6
- Author-email: markadc@126.com
7
- Requires-Python: >=3.10
@@ -1,41 +0,0 @@
1
- from pathlib import Path
2
-
3
- import click
4
-
5
- TEMPLATE_DIR = Path(__file__).parent / "templates"
6
-
7
-
8
- @click.group()
9
- def main():
10
- """
11
- \n
12
- 可用命令:
13
- new - 创建新的爬虫文件
14
- \n
15
- 示例:
16
- cc new -s demo
17
- """
18
-
19
-
20
- @main.command()
21
- @click.option('-s', '--spider', required=True, help='爬虫文件')
22
- def new(spider):
23
- """新建"""
24
- spider_file_name = "{}.py".format(spider)
25
- try:
26
- template_path = TEMPLATE_DIR / "spider.py"
27
- with open(template_path, 'r') as f:
28
- content = f.read()
29
-
30
- with open(spider_file_name, 'w') as f:
31
- f.write(content)
32
-
33
- click.echo("Success")
34
-
35
- except Exception as e:
36
- click.echo(str(e))
37
- raise click.ClickException("Failed")
38
-
39
-
40
- if __name__ == '__main__':
41
- main()
File without changes
@@ -1,7 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: coocan
3
- Version: 0.4.5
4
- Summary: Air Spider Framework
5
- Author: wauo
6
- Author-email: markadc@126.com
7
- Requires-Python: >=3.10
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes