crawlo 1.0.8__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlo might be problematic. Click here for more details.
- crawlo/__version__.py +1 -1
- crawlo/commands/__init__.py +6 -2
- crawlo/commands/check.py +80 -31
- crawlo/commands/list.py +67 -40
- crawlo/commands/run.py +92 -102
- crawlo/commands/stats.py +139 -31
- crawlo/core/engine.py +1 -1
- {crawlo-1.0.8.dist-info → crawlo-1.1.0.dist-info}/METADATA +1 -1
- {crawlo-1.0.8.dist-info → crawlo-1.1.0.dist-info}/RECORD +13 -13
- examples/gxb/spider/__init__.py +2 -0
- {crawlo-1.0.8.dist-info → crawlo-1.1.0.dist-info}/WHEEL +0 -0
- {crawlo-1.0.8.dist-info → crawlo-1.1.0.dist-info}/entry_points.txt +0 -0
- {crawlo-1.0.8.dist-info → crawlo-1.1.0.dist-info}/top_level.txt +0 -0
crawlo/__version__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.0
|
|
1
|
+
__version__ = "1.1.0"
|
crawlo/commands/__init__.py
CHANGED
|
@@ -1,9 +1,13 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
1
|
+
#!/usr/bin/python
|
|
2
|
+
# -*- coding:UTF-8 -*-
|
|
3
|
+
|
|
3
4
|
_commands = {
|
|
4
5
|
'startproject': 'crawlo.commands.startproject',
|
|
5
6
|
'genspider': 'crawlo.commands.genspider',
|
|
6
7
|
'run': 'crawlo.commands.run',
|
|
8
|
+
'check': 'crawlo.commands.check',
|
|
9
|
+
'list': 'crawlo.commands.list',
|
|
10
|
+
'stats': 'crawlo.commands.stats'
|
|
7
11
|
}
|
|
8
12
|
|
|
9
13
|
def get_commands():
|
crawlo/commands/check.py
CHANGED
|
@@ -1,85 +1,130 @@
|
|
|
1
1
|
#!/usr/bin/python
|
|
2
|
-
# -*- coding:UTF-8 -*-
|
|
2
|
+
# -*- coding: UTF-8 -*-
|
|
3
3
|
"""
|
|
4
|
-
# @Time :
|
|
5
|
-
# @Author :
|
|
6
|
-
# @Desc :
|
|
4
|
+
# @Time : 2025-08-31 22:35
|
|
5
|
+
# @Author : crawl-coder
|
|
6
|
+
# @Desc : 命令行入口:crawlo check,检查所有爬虫定义是否合规。
|
|
7
7
|
"""
|
|
8
|
+
|
|
8
9
|
import sys
|
|
9
10
|
import configparser
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from importlib import import_module
|
|
10
13
|
|
|
11
14
|
from crawlo.crawler import CrawlerProcess
|
|
12
|
-
from crawlo.utils.project import get_settings
|
|
13
15
|
from crawlo.utils.log import get_logger
|
|
14
16
|
|
|
15
17
|
|
|
16
18
|
logger = get_logger(__name__)
|
|
17
19
|
|
|
18
20
|
|
|
21
|
+
def get_project_root():
|
|
22
|
+
"""
|
|
23
|
+
从当前目录向上查找 crawlo.cfg,确定项目根目录
|
|
24
|
+
"""
|
|
25
|
+
current = Path.cwd()
|
|
26
|
+
|
|
27
|
+
for _ in range(10):
|
|
28
|
+
cfg = current / "crawlo.cfg"
|
|
29
|
+
if cfg.exists():
|
|
30
|
+
return current
|
|
31
|
+
|
|
32
|
+
if current == current.parent:
|
|
33
|
+
break
|
|
34
|
+
current = current.parent
|
|
35
|
+
|
|
36
|
+
return None
|
|
37
|
+
|
|
38
|
+
|
|
19
39
|
def main(args):
|
|
40
|
+
"""
|
|
41
|
+
主函数:检查所有爬虫定义的合规性
|
|
42
|
+
用法: crawlo check
|
|
43
|
+
"""
|
|
20
44
|
if args:
|
|
21
|
-
print("Usage: crawlo check")
|
|
45
|
+
print("❌ Usage: crawlo check")
|
|
22
46
|
return 1
|
|
23
47
|
|
|
24
48
|
try:
|
|
25
|
-
|
|
49
|
+
# 1. 查找项目根目录
|
|
50
|
+
project_root = get_project_root()
|
|
26
51
|
if not project_root:
|
|
27
|
-
print("❌ Error: Cannot
|
|
52
|
+
print("❌ Error: Cannot find 'crawlo.cfg'. Are you in a crawlo project?")
|
|
53
|
+
print("💡 Tip: Run this command inside your project directory.")
|
|
28
54
|
return 1
|
|
29
55
|
|
|
30
|
-
|
|
31
|
-
|
|
56
|
+
project_root_str = str(project_root)
|
|
57
|
+
if project_root_str not in sys.path:
|
|
58
|
+
sys.path.insert(0, project_root_str)
|
|
32
59
|
|
|
33
|
-
|
|
60
|
+
# 2. 读取 crawlo.cfg
|
|
61
|
+
cfg_file = project_root / "crawlo.cfg"
|
|
34
62
|
if not cfg_file.exists():
|
|
35
|
-
print(f"❌ Error:
|
|
63
|
+
print(f"❌ Error: Expected config file not found: {cfg_file}")
|
|
36
64
|
return 1
|
|
37
65
|
|
|
38
66
|
config = configparser.ConfigParser()
|
|
39
|
-
config.read(cfg_file, encoding=
|
|
67
|
+
config.read(cfg_file, encoding="utf-8")
|
|
40
68
|
|
|
41
|
-
if not config.has_section(
|
|
69
|
+
if not config.has_section("settings") or not config.has_option("settings", "default"):
|
|
42
70
|
print("❌ Error: Missing [settings] section or 'default' option in crawlo.cfg")
|
|
43
71
|
return 1
|
|
44
72
|
|
|
45
|
-
settings_module = config.get(
|
|
46
|
-
project_package = settings_module.split(
|
|
73
|
+
settings_module = config.get("settings", "default")
|
|
74
|
+
project_package = settings_module.split(".")[0]
|
|
75
|
+
|
|
76
|
+
# 3. 确保项目包可导入
|
|
77
|
+
try:
|
|
78
|
+
import_module(project_package)
|
|
79
|
+
except ImportError as e:
|
|
80
|
+
print(f"❌ Failed to import project package '{project_package}': {e}")
|
|
81
|
+
return 1
|
|
47
82
|
|
|
48
|
-
#
|
|
49
|
-
|
|
83
|
+
# 4. 加载爬虫
|
|
84
|
+
spider_modules = [f"{project_package}.spiders"]
|
|
85
|
+
process = CrawlerProcess(spider_modules=spider_modules)
|
|
50
86
|
spider_names = process.get_spider_names()
|
|
51
87
|
|
|
52
88
|
if not spider_names:
|
|
53
89
|
print("📭 No spiders found.")
|
|
90
|
+
print("💡 Make sure:")
|
|
91
|
+
print(" • Spiders are defined in the 'spiders' module")
|
|
92
|
+
print(" • They have a `name` attribute")
|
|
93
|
+
print(" • Modules are properly imported")
|
|
54
94
|
return 1
|
|
55
95
|
|
|
56
96
|
print(f"🔍 Checking {len(spider_names)} spider(s)...")
|
|
57
97
|
print("-" * 60)
|
|
58
98
|
|
|
59
99
|
issues_found = False
|
|
100
|
+
|
|
60
101
|
for name in sorted(spider_names):
|
|
61
102
|
cls = process.get_spider_class(name)
|
|
62
103
|
issues = []
|
|
63
104
|
|
|
64
|
-
|
|
105
|
+
# 检查 name 属性
|
|
106
|
+
if not getattr(cls, "name", None):
|
|
65
107
|
issues.append("missing or empty 'name' attribute")
|
|
66
108
|
elif not isinstance(cls.name, str):
|
|
67
109
|
issues.append("'name' is not a string")
|
|
68
110
|
|
|
69
|
-
|
|
111
|
+
# 检查 start_requests 是否可调用
|
|
112
|
+
if not callable(getattr(cls, "start_requests", None)):
|
|
70
113
|
issues.append("missing or non-callable 'start_requests' method")
|
|
71
114
|
|
|
72
|
-
|
|
73
|
-
|
|
115
|
+
# 检查 start_urls 类型(不应是字符串)
|
|
116
|
+
if hasattr(cls, "start_urls") and isinstance(cls.start_urls, str):
|
|
117
|
+
issues.append("'start_urls' is a string; should be list or tuple")
|
|
74
118
|
|
|
75
|
-
#
|
|
119
|
+
# 实例化并检查 parse 方法(非强制但推荐)
|
|
76
120
|
try:
|
|
77
121
|
spider = cls.create_instance(None)
|
|
78
|
-
if not callable(getattr(spider,
|
|
79
|
-
issues.append("no 'parse' method defined (
|
|
122
|
+
if not callable(getattr(spider, "parse", None)):
|
|
123
|
+
issues.append("no 'parse' method defined (recommended)")
|
|
80
124
|
except Exception as e:
|
|
81
|
-
issues.append(f"failed to
|
|
125
|
+
issues.append(f"failed to instantiate spider: {e}")
|
|
82
126
|
|
|
127
|
+
# 输出结果
|
|
83
128
|
if issues:
|
|
84
129
|
print(f"❌ {name:<20} {cls.__name__}")
|
|
85
130
|
for issue in issues:
|
|
@@ -89,19 +134,23 @@ def main(args):
|
|
|
89
134
|
print(f"✅ {name:<20} {cls.__name__} (OK)")
|
|
90
135
|
|
|
91
136
|
print("-" * 60)
|
|
137
|
+
|
|
92
138
|
if issues_found:
|
|
93
139
|
print("⚠️ Some spiders have issues. Please fix them.")
|
|
94
140
|
return 1
|
|
95
141
|
else:
|
|
96
|
-
print("🎉 All spiders are compliant!")
|
|
142
|
+
print("🎉 All spiders are compliant and well-defined!")
|
|
97
143
|
return 0
|
|
98
144
|
|
|
99
145
|
except Exception as e:
|
|
100
|
-
print(f"❌
|
|
101
|
-
|
|
102
|
-
traceback.print_exc()
|
|
146
|
+
print(f"❌ Unexpected error during check: {e}")
|
|
147
|
+
logger.exception("Exception in 'crawlo check'")
|
|
103
148
|
return 1
|
|
104
149
|
|
|
105
150
|
|
|
106
|
-
if __name__ ==
|
|
151
|
+
if __name__ == "__main__":
|
|
152
|
+
"""
|
|
153
|
+
支持直接运行:
|
|
154
|
+
python -m crawlo.commands.check
|
|
155
|
+
"""
|
|
107
156
|
sys.exit(main(sys.argv[1:]))
|
crawlo/commands/list.py
CHANGED
|
@@ -1,92 +1,119 @@
|
|
|
1
1
|
#!/usr/bin/python
|
|
2
|
-
# -*- coding:UTF-8 -*-
|
|
2
|
+
# -*- coding: UTF-8 -*-
|
|
3
3
|
"""
|
|
4
|
-
# @Time :
|
|
5
|
-
# @Author :
|
|
6
|
-
# @Desc :
|
|
4
|
+
# @Time : 2025-08-31 22:33
|
|
5
|
+
# @Author : crawl-coder
|
|
6
|
+
# @Desc : 命令行入口:crawlo list,用于列出所有已注册的爬虫
|
|
7
7
|
"""
|
|
8
|
+
|
|
8
9
|
import sys
|
|
9
10
|
import configparser
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from importlib import import_module
|
|
10
13
|
|
|
11
14
|
from crawlo.crawler import CrawlerProcess
|
|
12
|
-
from crawlo.utils.project import get_settings
|
|
13
15
|
from crawlo.utils.log import get_logger
|
|
14
16
|
|
|
15
17
|
|
|
16
18
|
logger = get_logger(__name__)
|
|
17
19
|
|
|
18
20
|
|
|
21
|
+
def get_project_root():
|
|
22
|
+
"""
|
|
23
|
+
自动检测项目根目录:从当前目录向上查找 crawlo.cfg
|
|
24
|
+
找到后返回该目录路径(字符串),最多向上查找10层。
|
|
25
|
+
"""
|
|
26
|
+
current = Path.cwd()
|
|
27
|
+
|
|
28
|
+
for _ in range(10):
|
|
29
|
+
cfg = current / "crawlo.cfg"
|
|
30
|
+
if cfg.exists():
|
|
31
|
+
return str(current)
|
|
32
|
+
|
|
33
|
+
# 到达文件系统根目录
|
|
34
|
+
if current == current.parent:
|
|
35
|
+
break
|
|
36
|
+
current = current.parent
|
|
37
|
+
|
|
38
|
+
return None # 未找到
|
|
39
|
+
|
|
40
|
+
|
|
19
41
|
def main(args):
|
|
20
42
|
"""
|
|
21
|
-
|
|
43
|
+
主函数:列出所有可用爬虫
|
|
22
44
|
用法: crawlo list
|
|
23
45
|
"""
|
|
24
46
|
if args:
|
|
25
|
-
print("Usage: crawlo list")
|
|
47
|
+
print("❌ Usage: crawlo list")
|
|
26
48
|
return 1
|
|
27
49
|
|
|
28
50
|
try:
|
|
29
|
-
# 1.
|
|
30
|
-
project_root =
|
|
51
|
+
# 1. 查找项目根目录
|
|
52
|
+
project_root = get_project_root()
|
|
31
53
|
if not project_root:
|
|
32
|
-
print("❌ Error: Cannot
|
|
54
|
+
print("❌ Error: Cannot find 'crawlo.cfg'. Are you in a crawlo project?")
|
|
55
|
+
print("💡 Tip: Run this command inside your project directory, or create a project with 'crawlo startproject'.")
|
|
33
56
|
return 1
|
|
34
57
|
|
|
35
|
-
|
|
36
|
-
project_root_str = str(
|
|
58
|
+
project_root_path = Path(project_root)
|
|
59
|
+
project_root_str = str(project_root_path)
|
|
60
|
+
|
|
61
|
+
# 2. 将项目根加入 Python 路径,以便导入项目模块
|
|
37
62
|
if project_root_str not in sys.path:
|
|
38
63
|
sys.path.insert(0, project_root_str)
|
|
39
64
|
|
|
40
|
-
#
|
|
41
|
-
cfg_file =
|
|
42
|
-
if not cfg_file.exists():
|
|
43
|
-
print(f"❌ Error: crawlo.cfg not found in {project_root}")
|
|
44
|
-
return 1
|
|
45
|
-
|
|
65
|
+
# 3. 读取 crawlo.cfg 获取 settings 模块
|
|
66
|
+
cfg_file = project_root_path / "crawlo.cfg"
|
|
46
67
|
config = configparser.ConfigParser()
|
|
47
|
-
config.read(cfg_file, encoding=
|
|
68
|
+
config.read(cfg_file, encoding="utf-8")
|
|
48
69
|
|
|
49
|
-
if not config.has_section(
|
|
50
|
-
print("❌ Error:
|
|
70
|
+
if not config.has_section("settings") or not config.has_option("settings", "default"):
|
|
71
|
+
print("❌ Error: Invalid crawlo.cfg — missing [settings] or 'default' option.")
|
|
51
72
|
return 1
|
|
52
73
|
|
|
53
|
-
settings_module = config.get(
|
|
54
|
-
project_package = settings_module.split(
|
|
74
|
+
settings_module = config.get("settings", "default")
|
|
75
|
+
project_package = settings_module.split(".")[0]
|
|
76
|
+
|
|
77
|
+
# 4. 确保项目包可导入(可选:尝试导入以触发异常)
|
|
78
|
+
try:
|
|
79
|
+
import_module(project_package)
|
|
80
|
+
except ImportError as e:
|
|
81
|
+
print(f"❌ Failed to import project package '{project_package}': {e}")
|
|
82
|
+
return 1
|
|
55
83
|
|
|
56
|
-
#
|
|
84
|
+
# 5. 初始化 CrawlerProcess 并加载爬虫模块
|
|
57
85
|
spider_modules = [f"{project_package}.spiders"]
|
|
58
86
|
process = CrawlerProcess(spider_modules=spider_modules)
|
|
59
87
|
|
|
60
|
-
#
|
|
88
|
+
# 6. 获取所有爬虫名称
|
|
61
89
|
spider_names = process.get_spider_names()
|
|
62
90
|
if not spider_names:
|
|
63
|
-
print("📭 No spiders found.")
|
|
91
|
+
print("📭 No spiders found in 'spiders/' directory.")
|
|
64
92
|
print("💡 Make sure:")
|
|
65
|
-
print("
|
|
66
|
-
print("
|
|
67
|
-
print("
|
|
93
|
+
print(" • Spider classes inherit from `crawlo.spider.Spider`")
|
|
94
|
+
print(" • Each spider has a `name` attribute")
|
|
95
|
+
print(" • Spiders are imported in `spiders/__init__.py` (if using package)")
|
|
68
96
|
return 1
|
|
69
97
|
|
|
70
|
-
#
|
|
98
|
+
# 7. 输出爬虫列表
|
|
71
99
|
print(f"📋 Found {len(spider_names)} spider(s):")
|
|
72
|
-
print("-" *
|
|
100
|
+
print("-" * 60)
|
|
73
101
|
for name in sorted(spider_names):
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
print(f"🕷️ {name:<20} {
|
|
77
|
-
print("-" *
|
|
102
|
+
spider_cls = process.get_spider_class(name)
|
|
103
|
+
module_name = spider_cls.__module__.replace(f"{project_package}.", "")
|
|
104
|
+
print(f"🕷️ {name:<20} {spider_cls.__name__:<25} ({module_name})")
|
|
105
|
+
print("-" * 60)
|
|
78
106
|
return 0
|
|
79
107
|
|
|
80
108
|
except Exception as e:
|
|
81
|
-
print(f"❌
|
|
82
|
-
|
|
83
|
-
traceback.print_exc()
|
|
109
|
+
print(f"❌ Unexpected error: {e}")
|
|
110
|
+
logger.exception("Exception during 'crawlo list'")
|
|
84
111
|
return 1
|
|
85
112
|
|
|
86
113
|
|
|
87
|
-
if __name__ ==
|
|
114
|
+
if __name__ == "__main__":
|
|
88
115
|
"""
|
|
89
|
-
|
|
116
|
+
支持直接运行:
|
|
90
117
|
python -m crawlo.commands.list
|
|
91
118
|
"""
|
|
92
119
|
sys.exit(main(sys.argv[1:]))
|
crawlo/commands/run.py
CHANGED
|
@@ -1,106 +1,153 @@
|
|
|
1
|
+
#!/usr/bin/python
|
|
2
|
+
# -*- coding: UTF-8 -*-
|
|
1
3
|
"""
|
|
2
|
-
|
|
3
|
-
|
|
4
|
+
# @Time : 2025-08-31 22:36
|
|
5
|
+
# @Author : crawl-coder
|
|
6
|
+
# @Desc : 命令行入口:crawlo run <spider_name>|all,用于运行指定爬虫。
|
|
4
7
|
"""
|
|
5
|
-
|
|
8
|
+
import sys
|
|
6
9
|
import asyncio
|
|
7
|
-
from pathlib import Path
|
|
8
10
|
import configparser
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from importlib import import_module
|
|
9
13
|
|
|
10
14
|
from crawlo.crawler import CrawlerProcess
|
|
11
|
-
from crawlo.utils.project import get_settings
|
|
12
15
|
from crawlo.utils.log import get_logger
|
|
16
|
+
from crawlo.utils.project import get_settings
|
|
17
|
+
from crawlo.commands.stats import record_stats # 自动记录 stats
|
|
13
18
|
|
|
14
19
|
logger = get_logger(__name__)
|
|
15
20
|
|
|
16
21
|
|
|
22
|
+
def get_project_root():
|
|
23
|
+
"""
|
|
24
|
+
向上查找 crawlo.cfg 来确定项目根目录
|
|
25
|
+
"""
|
|
26
|
+
current = Path.cwd()
|
|
27
|
+
|
|
28
|
+
for _ in range(10):
|
|
29
|
+
cfg = current / "crawlo.cfg"
|
|
30
|
+
if cfg.exists():
|
|
31
|
+
return current
|
|
32
|
+
|
|
33
|
+
if current == current.parent:
|
|
34
|
+
break
|
|
35
|
+
current = current.parent
|
|
36
|
+
|
|
37
|
+
return None
|
|
38
|
+
|
|
39
|
+
|
|
17
40
|
def main(args):
|
|
18
41
|
"""
|
|
19
|
-
|
|
42
|
+
主函数:运行指定爬虫
|
|
20
43
|
用法:
|
|
21
44
|
crawlo run <spider_name>
|
|
22
45
|
crawlo run all
|
|
23
46
|
"""
|
|
24
47
|
if len(args) < 1:
|
|
25
|
-
print("Usage: crawlo run <spider_name>|all")
|
|
26
|
-
print("Examples:")
|
|
27
|
-
print("
|
|
28
|
-
print("
|
|
48
|
+
print("❌ Usage: crawlo run <spider_name>|all")
|
|
49
|
+
print("💡 Examples:")
|
|
50
|
+
print(" crawlo run baidu")
|
|
51
|
+
print(" crawlo run all")
|
|
29
52
|
return 1
|
|
30
53
|
|
|
31
54
|
spider_arg = args[0]
|
|
32
55
|
|
|
33
56
|
try:
|
|
34
|
-
# 1.
|
|
35
|
-
project_root =
|
|
57
|
+
# 1. 查找项目根目录
|
|
58
|
+
project_root = get_project_root()
|
|
36
59
|
if not project_root:
|
|
37
|
-
print("❌ Error: Cannot
|
|
60
|
+
print("❌ Error: Cannot find 'crawlo.cfg'. Are you in a crawlo project?")
|
|
61
|
+
print("💡 Tip: Run this command inside your project directory.")
|
|
38
62
|
return 1
|
|
39
63
|
|
|
40
|
-
|
|
41
|
-
|
|
64
|
+
project_root_str = str(project_root)
|
|
65
|
+
if project_root_str not in sys.path:
|
|
66
|
+
sys.path.insert(0, project_root_str)
|
|
42
67
|
|
|
43
|
-
# 2. 读取 crawlo.cfg
|
|
44
|
-
cfg_file = project_root /
|
|
68
|
+
# 2. 读取 crawlo.cfg 获取 settings 模块
|
|
69
|
+
cfg_file = project_root / "crawlo.cfg"
|
|
45
70
|
if not cfg_file.exists():
|
|
46
71
|
print(f"❌ Error: crawlo.cfg not found in {project_root}")
|
|
47
72
|
return 1
|
|
48
73
|
|
|
49
74
|
config = configparser.ConfigParser()
|
|
50
|
-
config.read(cfg_file, encoding=
|
|
75
|
+
config.read(cfg_file, encoding="utf-8")
|
|
51
76
|
|
|
52
|
-
if not config.has_section(
|
|
77
|
+
if not config.has_section("settings") or not config.has_option("settings", "default"):
|
|
53
78
|
print("❌ Error: Missing [settings] section or 'default' option in crawlo.cfg")
|
|
54
79
|
return 1
|
|
55
80
|
|
|
56
|
-
settings_module = config.get(
|
|
57
|
-
project_package = settings_module.split(
|
|
81
|
+
settings_module = config.get("settings", "default")
|
|
82
|
+
project_package = settings_module.split(".")[0]
|
|
58
83
|
|
|
59
|
-
# 3.
|
|
84
|
+
# 3. 确保项目包可导入
|
|
85
|
+
try:
|
|
86
|
+
import_module(project_package)
|
|
87
|
+
except ImportError as e:
|
|
88
|
+
print(f"❌ Failed to import project package '{project_package}': {e}")
|
|
89
|
+
return 1
|
|
90
|
+
|
|
91
|
+
# 4. 加载 settings 和爬虫模块
|
|
92
|
+
settings = get_settings() # 此时已安全
|
|
60
93
|
spider_modules = [f"{project_package}.spiders"]
|
|
61
|
-
settings = get_settings()
|
|
62
94
|
process = CrawlerProcess(settings=settings, spider_modules=spider_modules)
|
|
63
95
|
|
|
64
|
-
# ===
|
|
96
|
+
# === 情况1:运行所有爬虫 ===
|
|
65
97
|
if spider_arg.lower() == "all":
|
|
66
98
|
spider_names = process.get_spider_names()
|
|
67
99
|
if not spider_names:
|
|
68
|
-
print("❌ No spiders found.
|
|
100
|
+
print("❌ No spiders found.")
|
|
101
|
+
print("💡 Make sure:")
|
|
102
|
+
print(" • Spiders are defined in 'spiders/'")
|
|
103
|
+
print(" • They have a `name` attribute")
|
|
104
|
+
print(" • Modules are imported (e.g. via __init__.py)")
|
|
69
105
|
return 1
|
|
70
106
|
|
|
71
|
-
print(f"🚀 Starting ALL {len(spider_names)}
|
|
107
|
+
print(f"🚀 Starting ALL {len(spider_names)} spider(s):")
|
|
108
|
+
print("-" * 60)
|
|
72
109
|
for name in sorted(spider_names):
|
|
73
110
|
cls = process.get_spider_class(name)
|
|
74
|
-
print(f"
|
|
75
|
-
print("-" *
|
|
111
|
+
print(f"🕷️ {name:<20} {cls.__name__}")
|
|
112
|
+
print("-" * 60)
|
|
113
|
+
|
|
114
|
+
# 注册 stats 记录(每个爬虫结束时保存)
|
|
115
|
+
for crawler in process.crawlers:
|
|
116
|
+
crawler.signals.connect(record_stats, signal="spider_closed")
|
|
76
117
|
|
|
77
|
-
#
|
|
118
|
+
# 并行运行所有爬虫(可改为串行:for name in ... await process.crawl(name))
|
|
78
119
|
asyncio.run(process.crawl(spider_names))
|
|
120
|
+
print("✅ All spiders completed.")
|
|
79
121
|
return 0
|
|
80
122
|
|
|
81
|
-
# ===
|
|
123
|
+
# === 情况2:运行单个爬虫 ===
|
|
82
124
|
spider_name = spider_arg
|
|
83
125
|
if not process.is_spider_registered(spider_name):
|
|
84
|
-
print(f"❌
|
|
85
|
-
|
|
86
|
-
if
|
|
126
|
+
print(f"❌ Spider '{spider_name}' not found.")
|
|
127
|
+
available = process.get_spider_names()
|
|
128
|
+
if available:
|
|
87
129
|
print("💡 Available spiders:")
|
|
88
|
-
for name in sorted(
|
|
130
|
+
for name in sorted(available):
|
|
89
131
|
cls = process.get_spider_class(name)
|
|
90
|
-
print(f"
|
|
132
|
+
print(f" • {name} ({cls.__name__})")
|
|
91
133
|
else:
|
|
92
|
-
print("💡 No spiders found.
|
|
134
|
+
print("💡 No spiders found. Check your spiders module.")
|
|
93
135
|
return 1
|
|
94
136
|
|
|
95
137
|
spider_class = process.get_spider_class(spider_name)
|
|
96
138
|
|
|
97
139
|
# 打印启动信息
|
|
98
140
|
print(f"🚀 Starting spider: {spider_name}")
|
|
99
|
-
print(f"
|
|
100
|
-
print(f"
|
|
141
|
+
print(f"📦 Project: {project_package}")
|
|
142
|
+
print(f"CppClass: {spider_class.__name__}")
|
|
143
|
+
print(f"📄 Module: {spider_class.__module__}")
|
|
101
144
|
print("-" * 50)
|
|
102
145
|
|
|
103
|
-
#
|
|
146
|
+
# 注册 stats 记录
|
|
147
|
+
for crawler in process.crawlers:
|
|
148
|
+
crawler.signals.connect(record_stats, signal="spider_closed")
|
|
149
|
+
|
|
150
|
+
# 运行爬虫
|
|
104
151
|
asyncio.run(process.crawl(spider_name))
|
|
105
152
|
|
|
106
153
|
print("-" * 50)
|
|
@@ -111,71 +158,14 @@ def main(args):
|
|
|
111
158
|
print("\n⚠️ Spider interrupted by user.")
|
|
112
159
|
return 1
|
|
113
160
|
except Exception as e:
|
|
114
|
-
print(f"❌
|
|
115
|
-
|
|
116
|
-
traceback.print_exc()
|
|
161
|
+
print(f"❌ Unexpected error: {e}")
|
|
162
|
+
logger.exception("Exception during 'crawlo run'")
|
|
117
163
|
return 1
|
|
118
164
|
|
|
119
165
|
|
|
120
|
-
|
|
121
|
-
"""
|
|
122
|
-
列出指定项目包中所有可用的爬虫(用于调试或命令行扩展)
|
|
123
|
-
"""
|
|
124
|
-
try:
|
|
125
|
-
# 临时创建一个 CrawlerProcess 来发现爬虫
|
|
126
|
-
process = CrawlerProcess(spider_modules=[f"{project_package}.spiders"])
|
|
127
|
-
available_names = process.get_spider_names()
|
|
128
|
-
|
|
129
|
-
if not available_names:
|
|
130
|
-
print(" No spiders found. Make sure:")
|
|
131
|
-
print(" - spiders/ 目录存在")
|
|
132
|
-
print(" - 爬虫类继承 Spider 且定义了 name")
|
|
133
|
-
print(" - 模块被导入(可通过 __init__.py 触发)")
|
|
134
|
-
return
|
|
135
|
-
|
|
136
|
-
print(f"Found {len(available_names)} spider(s):")
|
|
137
|
-
for name in sorted(available_names):
|
|
138
|
-
cls = process.get_spider_class(name)
|
|
139
|
-
module = cls.__module__.replace(project_package + ".", "")
|
|
140
|
-
print(f" - {name} ({cls.__name__} @ {module})")
|
|
141
|
-
except Exception as e:
|
|
142
|
-
print(f"❌ Failed to list spiders: {e}")
|
|
143
|
-
import traceback
|
|
144
|
-
traceback.print_exc()
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
def run_spider_by_name(spider_name: str, project_package: str = None):
|
|
166
|
+
if __name__ == "__main__":
|
|
148
167
|
"""
|
|
149
|
-
|
|
168
|
+
支持直接运行:
|
|
169
|
+
python -m crawlo.commands.run spider_name
|
|
150
170
|
"""
|
|
151
|
-
|
|
152
|
-
# 尝试从配置读取
|
|
153
|
-
cfg_file = Path('crawlo.cfg')
|
|
154
|
-
if cfg_file.exists():
|
|
155
|
-
config = configparser.ConfigParser()
|
|
156
|
-
config.read(cfg_file, encoding='utf-8')
|
|
157
|
-
if config.has_option('settings', 'default'):
|
|
158
|
-
project_package = config.get('settings', 'default').split('.')[0]
|
|
159
|
-
|
|
160
|
-
if not project_package:
|
|
161
|
-
print("❌ Error: project_package is required.")
|
|
162
|
-
return 1
|
|
163
|
-
|
|
164
|
-
# 添加项目路径
|
|
165
|
-
project_root = get_settings().get('PROJECT_ROOT')
|
|
166
|
-
if project_root and str(project_root) not in sys.path:
|
|
167
|
-
sys.path.insert(0, str(project_root))
|
|
168
|
-
|
|
169
|
-
# 复用 main 函数逻辑
|
|
170
|
-
args = [spider_name]
|
|
171
|
-
return main(args)
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
if __name__ == '__main__':
|
|
175
|
-
"""
|
|
176
|
-
允许直接运行:
|
|
177
|
-
python -m crawlo.commands.run <spider_name>
|
|
178
|
-
"""
|
|
179
|
-
import sys
|
|
180
|
-
|
|
181
|
-
sys.exit(main(sys.argv[1:]))
|
|
171
|
+
sys.exit(main(sys.argv[1:]))
|
crawlo/commands/stats.py
CHANGED
|
@@ -1,59 +1,167 @@
|
|
|
1
1
|
#!/usr/bin/python
|
|
2
|
-
# -*- coding:UTF-8 -*-
|
|
2
|
+
# -*- coding: UTF-8 -*-
|
|
3
3
|
"""
|
|
4
|
-
# @Time :
|
|
5
|
-
# @Author :
|
|
6
|
-
# @Desc :
|
|
4
|
+
# @Time : 2025-08-31 22:36
|
|
5
|
+
# @Author : crawl-coder
|
|
6
|
+
# @Desc : 命令行入口:crawlo stats,查看最近运行的爬虫统计信息。
|
|
7
7
|
"""
|
|
8
|
+
|
|
8
9
|
import sys
|
|
10
|
+
import json
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
from typing import Dict, Any
|
|
14
|
+
|
|
9
15
|
from crawlo.utils.log import get_logger
|
|
10
16
|
|
|
11
17
|
|
|
12
18
|
logger = get_logger(__name__)
|
|
13
19
|
|
|
14
|
-
#
|
|
15
|
-
|
|
20
|
+
# 默认存储目录(相对于项目根目录)
|
|
21
|
+
STATS_DIR = "logs/stats"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_stats_dir() -> Path:
|
|
25
|
+
"""
|
|
26
|
+
获取统计文件存储目录,优先使用项目根下的 logs/stats/
|
|
27
|
+
如果不在项目中,回退到当前目录
|
|
28
|
+
"""
|
|
29
|
+
# 尝试查找项目根目录(通过 crawlo.cfg)
|
|
30
|
+
current = Path.cwd()
|
|
31
|
+
for _ in range(10):
|
|
32
|
+
if (current / "crawlo.cfg").exists():
|
|
33
|
+
return current / STATS_DIR
|
|
34
|
+
if current == current.parent:
|
|
35
|
+
break
|
|
36
|
+
current = current.parent
|
|
37
|
+
|
|
38
|
+
# 回退:使用当前目录下的 logs/stats
|
|
39
|
+
return Path.cwd() / STATS_DIR
|
|
16
40
|
|
|
17
41
|
|
|
18
42
|
def record_stats(crawler):
|
|
19
|
-
"""
|
|
20
|
-
|
|
21
|
-
|
|
43
|
+
"""
|
|
44
|
+
【供爬虫运行时调用】记录爬虫结束后的统计信息到 JSON 文件
|
|
45
|
+
需在 Crawler 的 closed 回调中调用
|
|
46
|
+
"""
|
|
47
|
+
spider_name = getattr(crawler.spider, "name", "unknown")
|
|
48
|
+
stats = crawler.stats.get_stats() if crawler.stats else {}
|
|
49
|
+
|
|
50
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
51
|
+
stats_dir = Path(get_stats_dir())
|
|
52
|
+
stats_dir.mkdir(parents=True, exist_ok=True)
|
|
53
|
+
|
|
54
|
+
filename = stats_dir / f"{spider_name}_{timestamp}.json"
|
|
55
|
+
try:
|
|
56
|
+
with open(filename, "w", encoding="utf-8") as f:
|
|
57
|
+
json.dump({
|
|
58
|
+
"spider": spider_name,
|
|
59
|
+
"timestamp": datetime.now().isoformat(),
|
|
60
|
+
"stats": stats
|
|
61
|
+
}, f, ensure_ascii=False, indent=2, default=str)
|
|
62
|
+
logger.info(f"📊 Stats saved for spider '{spider_name}' → {filename}")
|
|
63
|
+
except Exception as e:
|
|
64
|
+
logger.error(f"Failed to save stats for '{spider_name}': {e}")
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def load_all_stats() -> Dict[str, list]:
|
|
68
|
+
"""
|
|
69
|
+
加载所有已保存的统计文件,按 spider name 分组
|
|
70
|
+
返回: {spider_name: [stats_record, ...]}
|
|
71
|
+
"""
|
|
72
|
+
stats_dir = get_stats_dir()
|
|
73
|
+
if not stats_dir.exists():
|
|
74
|
+
return {}
|
|
75
|
+
|
|
76
|
+
result = {}
|
|
77
|
+
json_files = sorted(stats_dir.glob("*.json"), key=lambda x: x.stat().st_mtime, reverse=True)
|
|
78
|
+
|
|
79
|
+
for file in json_files:
|
|
80
|
+
try:
|
|
81
|
+
with open(file, "r", encoding="utf-8") as f:
|
|
82
|
+
data = json.load(f)
|
|
83
|
+
spider_name = data.get("spider", "unknown")
|
|
84
|
+
result.setdefault(spider_name, []).append(data)
|
|
85
|
+
except Exception as e:
|
|
86
|
+
logger.warning(f"Failed to load stats file {file}: {e}")
|
|
87
|
+
return result
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def format_value(v: Any) -> str:
|
|
91
|
+
"""格式化值,防止太长或不可打印"""
|
|
92
|
+
if isinstance(v, float):
|
|
93
|
+
return f"{v:.4f}"
|
|
94
|
+
return str(v)
|
|
22
95
|
|
|
23
96
|
|
|
24
97
|
def main(args):
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
98
|
+
"""
|
|
99
|
+
主函数:查看统计信息
|
|
100
|
+
用法:
|
|
101
|
+
crawlo stats → 显示所有爬虫最近一次运行
|
|
102
|
+
crawlo stats myspider → 显示指定爬虫所有历史记录
|
|
103
|
+
crawlo stats myspider --all → 显示所有历史(同上)
|
|
104
|
+
"""
|
|
105
|
+
if len(args) > 2:
|
|
106
|
+
print("Usage: crawlo stats [spider_name] [--all]")
|
|
107
|
+
return 1
|
|
108
|
+
|
|
109
|
+
spider_name = None
|
|
110
|
+
show_all = False
|
|
111
|
+
|
|
112
|
+
if args:
|
|
113
|
+
spider_name = args[0]
|
|
114
|
+
show_all = "--all" in args or "-a" in args
|
|
115
|
+
|
|
116
|
+
# 加载所有 stats
|
|
117
|
+
all_stats = load_all_stats()
|
|
118
|
+
if not all_stats:
|
|
119
|
+
print("📊 No stats found. Run a spider first.")
|
|
120
|
+
print(f"💡 Stats are saved in: {get_stats_dir()}")
|
|
121
|
+
return 0
|
|
30
122
|
|
|
31
|
-
|
|
123
|
+
if not spider_name:
|
|
124
|
+
# 显示每个爬虫最近一次运行
|
|
125
|
+
print("📊 Recent Spider Statistics (last run):")
|
|
32
126
|
print("-" * 60)
|
|
33
|
-
for
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
127
|
+
for name, runs in all_stats.items():
|
|
128
|
+
latest = runs[0]
|
|
129
|
+
print(f"🕷️ {name} ({latest['timestamp'][:19]})")
|
|
130
|
+
stats = latest["stats"]
|
|
131
|
+
for k in sorted(stats.keys()):
|
|
132
|
+
print(f" {k:<30} {format_value(stats[k])}")
|
|
37
133
|
print()
|
|
38
134
|
return 0
|
|
39
135
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
if spider_name not in
|
|
136
|
+
else:
|
|
137
|
+
# 查看指定爬虫
|
|
138
|
+
if spider_name not in all_stats:
|
|
43
139
|
print(f"📊 No stats found for spider '{spider_name}'")
|
|
140
|
+
available = ', '.join(all_stats.keys())
|
|
141
|
+
if available:
|
|
142
|
+
print(f"💡 Available spiders: {available}")
|
|
44
143
|
return 1
|
|
45
144
|
|
|
46
|
-
|
|
47
|
-
|
|
145
|
+
runs = all_stats[spider_name]
|
|
146
|
+
if show_all:
|
|
147
|
+
print(f"📊 All runs for '{spider_name}' ({len(runs)} runs):")
|
|
148
|
+
else:
|
|
149
|
+
runs = runs[:1]
|
|
150
|
+
print(f"📊 Last run for '{spider_name}':")
|
|
151
|
+
|
|
48
152
|
print("-" * 60)
|
|
49
|
-
for
|
|
50
|
-
print(f"
|
|
153
|
+
for run in runs:
|
|
154
|
+
print(f"⏱️ Timestamp: {run['timestamp']}")
|
|
155
|
+
stats = run["stats"]
|
|
156
|
+
for k in sorted(stats.keys()):
|
|
157
|
+
print(f" {k:<30} {format_value(stats[k])}")
|
|
158
|
+
print("─" * 60)
|
|
51
159
|
return 0
|
|
52
160
|
|
|
53
|
-
else:
|
|
54
|
-
print("Usage: crawlo stats [spider_name]")
|
|
55
|
-
return 1
|
|
56
|
-
|
|
57
161
|
|
|
58
|
-
if __name__ ==
|
|
162
|
+
if __name__ == "__main__":
|
|
163
|
+
"""
|
|
164
|
+
支持直接运行:
|
|
165
|
+
python -m crawlo.commands.stats
|
|
166
|
+
"""
|
|
59
167
|
sys.exit(main(sys.argv[1:]))
|
crawlo/core/engine.py
CHANGED
|
@@ -42,7 +42,7 @@ class Engine(object):
|
|
|
42
42
|
def engine_start(self):
|
|
43
43
|
self.running = True
|
|
44
44
|
self.logger.info(
|
|
45
|
-
f"Crawlo (version {self.settings.
|
|
45
|
+
f"Crawlo (version {self.settings.get_float('VERSION')}) started. "
|
|
46
46
|
f"(project name : {self.settings.get('PROJECT_NAME')})"
|
|
47
47
|
)
|
|
48
48
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
crawlo/__init__.py,sha256=xpiIAZbSG3CzneJuDLPCbwfRcvw2wyHYl2kJjaNfNGY,584
|
|
2
|
-
crawlo/__version__.py,sha256=
|
|
2
|
+
crawlo/__version__.py,sha256=Zrv57EzpjdsuSPqsYvFkVsQKKRUOHFG7yURCf7qN-Tk,23
|
|
3
3
|
crawlo/cli.py,sha256=hjAJKx9pba375sATvvcy-dtZyBIgXj8fRBq9RFIZHA4,1206
|
|
4
4
|
crawlo/crawler.py,sha256=AyKxUyJvCwb1u4d3Zn3vFmjH28ExWKIygfTICps-3yY,20026
|
|
5
5
|
crawlo/event.py,sha256=ZhoPW5CglCEuZNFEwviSCBIw0pT5O6jT98bqYrDFd3E,324
|
|
@@ -7,15 +7,15 @@ crawlo/exceptions.py,sha256=xdyZkvVcLEJ-19sWMHvn9IJsu30-hAY2jJhA2kYIims,1207
|
|
|
7
7
|
crawlo/stats_collector.py,sha256=v4jC9BAe-23w93hWzbeMCCgQ9VuFPyxw5JV9ItbGH8w,1636
|
|
8
8
|
crawlo/subscriber.py,sha256=udlHeTR0ymGQhCDxVUGwUzeeeR4TYCEJrJwFnkgr0cU,3836
|
|
9
9
|
crawlo/task_manager.py,sha256=D9m-nqnGj-FZPtGk4CdwZX3Gw7IWyYvTS7CHpRGWc_w,748
|
|
10
|
-
crawlo/commands/__init__.py,sha256=
|
|
11
|
-
crawlo/commands/check.py,sha256=
|
|
10
|
+
crawlo/commands/__init__.py,sha256=kZ3qATqDPmMUCNUQSFfBfIA8fp_1dgBwIAWbmFN3_To,355
|
|
11
|
+
crawlo/commands/check.py,sha256=He5Dmpn8M0gYEfiXRW801I6ULypWKMvT5Iwjg_4cUYE,5070
|
|
12
12
|
crawlo/commands/genspider.py,sha256=kSHYsAGHRoxU6Qf_MGpR_VS-Ua5NUGY2KGm_Wapn0sw,3529
|
|
13
|
-
crawlo/commands/list.py,sha256=
|
|
14
|
-
crawlo/commands/run.py,sha256=
|
|
13
|
+
crawlo/commands/list.py,sha256=iwd1piFYa7cr4WkRTD0ndCZEN0xoZX0vvlWTU1FbSYE,3972
|
|
14
|
+
crawlo/commands/run.py,sha256=ppgEUNVNuhpQFiBkgB6ZFAKeOJiLybd68gGHcAJgF4w,5813
|
|
15
15
|
crawlo/commands/startproject.py,sha256=1KOq_CALy01oklr0dAUYhGFzu4f7w45q2H0O3qafLX4,3494
|
|
16
|
-
crawlo/commands/stats.py,sha256=
|
|
16
|
+
crawlo/commands/stats.py,sha256=siuCv2PGhr0_eqAaER2YYwI_IHmOlFbgIHWmX3-EWs4,5246
|
|
17
17
|
crawlo/core/__init__.py,sha256=JYSAn15r8yWgRK_Nc69t_8tZCyb70MiPZKssA8wrYz0,43
|
|
18
|
-
crawlo/core/engine.py,sha256=
|
|
18
|
+
crawlo/core/engine.py,sha256=SoTVS3F2EI1G_zQVe9UbeUz8cBhyVFlxJ-HuhPD3ock,6032
|
|
19
19
|
crawlo/core/processor.py,sha256=oHLs-cno0bJGTNc9NGD2S7_2-grI3ruvggO0SY2mf3Q,1180
|
|
20
20
|
crawlo/core/scheduler.py,sha256=ZMPs4LSs69FsFfDTvaOMJKqpSQQGvIEE9pMyYVVAA64,1948
|
|
21
21
|
crawlo/downloader/__init__.py,sha256=72u2Hef4HaMfs9VCqEjbMtiaRXbaXmgNiJn6qy09LHs,2384
|
|
@@ -81,7 +81,7 @@ examples/gxb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
81
81
|
examples/gxb/items.py,sha256=3-1Lxpi7EqMzheDJoO0MPyHky5nHG_nqQGgKlm8y6mQ,989
|
|
82
82
|
examples/gxb/run.py,sha256=9kJlR8f-tZ3BqP5PW7sCLTw6PAFWo3x4cG5lc-6GWqI,333
|
|
83
83
|
examples/gxb/settings.py,sha256=_nbXj9HV2e0F6liUzK0ueygLcaMM_IUlkuwL6mJqUfc,2345
|
|
84
|
-
examples/gxb/spider/__init__.py,sha256=
|
|
84
|
+
examples/gxb/spider/__init__.py,sha256=E5bYTAuqcy2KBgnZnZ7OoW7mE6YUIy2w748zCrE68nI,92
|
|
85
85
|
examples/gxb/spider/miit_spider.py,sha256=tcQnuyUHfu-Re1QbKKSI9DXW3Sp1vyBW8qBzKLf_RC4,6666
|
|
86
86
|
examples/gxb/spider/telecom_device.py,sha256=58iG6BQtQjjDHOF7-DXH0u5_XnppP5AJTQwaVJVyBEo,4929
|
|
87
87
|
tests/__init__.py,sha256=409aRX8hsPffiZCVjOogtxwhACzBp8G2UTJyUQSxhK0,136
|
|
@@ -90,8 +90,8 @@ tests/test_proxy_middleware_integration.py,sha256=mTPK_XvbmLCV_QoVZzA3ybWOOX6149
|
|
|
90
90
|
tests/test_proxy_providers.py,sha256=u_R2fhab90vqvQEaOAztpAOe9tJXvUMIdoDxmStmXJ4,1749
|
|
91
91
|
tests/test_proxy_stats.py,sha256=ES00CEoDITYPFBGPk8pecFzD3ItYIv6NSpcqNd8-kvo,526
|
|
92
92
|
tests/test_proxy_strategies.py,sha256=9Z1pXmTNyw-eIhGXlf2abZbJx6igLohYq-_3hldQ5uE,1868
|
|
93
|
-
crawlo-1.0.
|
|
94
|
-
crawlo-1.0.
|
|
95
|
-
crawlo-1.0.
|
|
96
|
-
crawlo-1.0.
|
|
97
|
-
crawlo-1.0.
|
|
93
|
+
crawlo-1.1.0.dist-info/METADATA,sha256=WTcM-8FqMpTLvIAPGvruLzPCBZnY3ODYklhnv7eVS70,1825
|
|
94
|
+
crawlo-1.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
95
|
+
crawlo-1.1.0.dist-info/entry_points.txt,sha256=5HoVoTSPxI8SCa5B7pQYxLSrkOdiunyO9tqNsLMv52g,43
|
|
96
|
+
crawlo-1.1.0.dist-info/top_level.txt,sha256=keG_67pbZ_wZL2dmDRA9RMaNHTaV_x_oxZ9DKNgwvR0,22
|
|
97
|
+
crawlo-1.1.0.dist-info/RECORD,,
|
examples/gxb/spider/__init__.py
CHANGED
|
File without changes
|
|
File without changes
|
|
File without changes
|