crawlo 1.1.1__py3-none-any.whl → 1.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlo might be problematic. Click here for more details.

Files changed (68) hide show
  1. crawlo/__init__.py +2 -1
  2. crawlo/__version__.py +1 -1
  3. crawlo/commands/genspider.py +68 -42
  4. crawlo/commands/list.py +102 -93
  5. crawlo/commands/startproject.py +89 -4
  6. crawlo/commands/utils.py +187 -0
  7. crawlo/config.py +280 -0
  8. crawlo/core/engine.py +16 -3
  9. crawlo/core/enhanced_engine.py +190 -0
  10. crawlo/core/scheduler.py +113 -8
  11. crawlo/crawler.py +840 -307
  12. crawlo/downloader/__init__.py +181 -17
  13. crawlo/downloader/aiohttp_downloader.py +15 -2
  14. crawlo/downloader/cffi_downloader.py +11 -1
  15. crawlo/downloader/httpx_downloader.py +14 -3
  16. crawlo/filters/__init__.py +122 -5
  17. crawlo/filters/aioredis_filter.py +128 -36
  18. crawlo/filters/memory_filter.py +99 -32
  19. crawlo/middleware/proxy.py +11 -8
  20. crawlo/middleware/retry.py +40 -5
  21. crawlo/mode_manager.py +201 -0
  22. crawlo/network/__init__.py +17 -3
  23. crawlo/network/request.py +118 -10
  24. crawlo/network/response.py +131 -28
  25. crawlo/pipelines/__init__.py +1 -1
  26. crawlo/pipelines/csv_pipeline.py +317 -0
  27. crawlo/pipelines/json_pipeline.py +219 -0
  28. crawlo/queue/__init__.py +0 -0
  29. crawlo/queue/pqueue.py +37 -0
  30. crawlo/queue/queue_manager.py +304 -0
  31. crawlo/queue/redis_priority_queue.py +192 -0
  32. crawlo/settings/default_settings.py +68 -9
  33. crawlo/spider/__init__.py +576 -66
  34. crawlo/task_manager.py +4 -1
  35. crawlo/templates/project/middlewares.py.tmpl +56 -45
  36. crawlo/templates/project/pipelines.py.tmpl +308 -36
  37. crawlo/templates/project/run.py.tmpl +239 -0
  38. crawlo/templates/project/settings.py.tmpl +211 -17
  39. crawlo/templates/spider/spider.py.tmpl +153 -7
  40. crawlo/utils/controlled_spider_mixin.py +336 -0
  41. crawlo/utils/large_scale_config.py +287 -0
  42. crawlo/utils/large_scale_helper.py +344 -0
  43. crawlo/utils/queue_helper.py +176 -0
  44. crawlo/utils/request_serializer.py +220 -0
  45. crawlo-1.1.2.dist-info/METADATA +567 -0
  46. {crawlo-1.1.1.dist-info → crawlo-1.1.2.dist-info}/RECORD +54 -46
  47. tests/test_final_validation.py +154 -0
  48. tests/test_redis_config.py +29 -0
  49. tests/test_redis_queue.py +225 -0
  50. tests/test_request_serialization.py +71 -0
  51. tests/test_scheduler.py +242 -0
  52. crawlo/pipelines/mysql_batch_pipline.py +0 -273
  53. crawlo/utils/pqueue.py +0 -174
  54. crawlo-1.1.1.dist-info/METADATA +0 -220
  55. examples/baidu_spider/__init__.py +0 -7
  56. examples/baidu_spider/demo.py +0 -94
  57. examples/baidu_spider/items.py +0 -46
  58. examples/baidu_spider/middleware.py +0 -49
  59. examples/baidu_spider/pipeline.py +0 -55
  60. examples/baidu_spider/run.py +0 -27
  61. examples/baidu_spider/settings.py +0 -121
  62. examples/baidu_spider/spiders/__init__.py +0 -7
  63. examples/baidu_spider/spiders/bai_du.py +0 -61
  64. examples/baidu_spider/spiders/miit.py +0 -159
  65. examples/baidu_spider/spiders/sina.py +0 -79
  66. {crawlo-1.1.1.dist-info → crawlo-1.1.2.dist-info}/WHEEL +0 -0
  67. {crawlo-1.1.1.dist-info → crawlo-1.1.2.dist-info}/entry_points.txt +0 -0
  68. {crawlo-1.1.1.dist-info → crawlo-1.1.2.dist-info}/top_level.txt +0 -0
crawlo/__init__.py CHANGED
@@ -4,7 +4,7 @@
4
4
  Crawlo - 一个异步爬虫框架
5
5
  """
6
6
  from crawlo.spider import Spider
7
- from crawlo.items.items import Item
7
+ from crawlo.items import Item, Field
8
8
  from crawlo.network.request import Request
9
9
  from crawlo.network.response import Response
10
10
  from crawlo.downloader import DownloaderBase
@@ -26,6 +26,7 @@ except Exception:
26
26
  __all__ = [
27
27
  'Spider',
28
28
  'Item',
29
+ 'Field',
29
30
  'Request',
30
31
  'Response',
31
32
  'DownloaderBase',
crawlo/__version__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.1.1"
1
+ __version__ = "1.1.2"
@@ -11,6 +11,15 @@ import configparser
11
11
  import importlib
12
12
  from rich.console import Console
13
13
 
14
+ from .utils import (
15
+ get_project_root,
16
+ validate_project_environment,
17
+ show_error_panel,
18
+ show_success_panel,
19
+ validate_spider_name,
20
+ is_valid_domain
21
+ )
22
+
14
23
  # 初始化 rich 控制台
15
24
  console = Console()
16
25
 
@@ -29,41 +38,42 @@ def _render_template(tmpl_path, context):
29
38
  def main(args):
30
39
  if len(args) < 2:
31
40
  console.print("[bold red]Error:[/bold red] Usage: [blue]crawlo genspider[/blue] <spider_name> <domain>")
41
+ console.print("💡 Examples:")
42
+ console.print(" [blue]crawlo genspider[/blue] news_spider news.example.com")
43
+ console.print(" [blue]crawlo genspider[/blue] product_spider shop.example.com")
32
44
  return 1
33
45
 
34
46
  spider_name = args[0]
35
47
  domain = args[1]
36
-
37
- # 查找项目根目录
38
- project_root = None
39
- current = Path.cwd()
40
- while True:
41
- cfg_file = current / 'crawlo.cfg'
42
- if cfg_file.exists():
43
- project_root = current
44
- break
45
- parent = current.parent
46
- if parent == current:
47
- break
48
- current = parent
49
-
50
- if not project_root:
51
- console.print("[bold red]:cross_mark: Error:[/bold red] Not a crawlo project. [cyan]crawlo.cfg[/cyan] not found.")
48
+
49
+ # 验证爬虫名称
50
+ if not validate_spider_name(spider_name):
51
+ show_error_panel(
52
+ "Invalid Spider Name",
53
+ f"Spider name '[cyan]{spider_name}[/cyan]' is invalid.\n"
54
+ "💡 Spider name should:\n"
55
+ " • Start with lowercase letter\n"
56
+ " • Contain only lowercase letters, numbers, and underscores\n"
57
+ " • Be a valid Python identifier"
58
+ )
59
+ return 1
60
+
61
+ # 验证域名格式
62
+ if not is_valid_domain(domain):
63
+ show_error_panel(
64
+ "Invalid Domain",
65
+ f"Domain '[cyan]{domain}[/cyan]' format is invalid.\n"
66
+ "💡 Please provide a valid domain name like 'example.com'"
67
+ )
52
68
  return 1
53
69
 
54
- # 将项目根目录加入 sys.path
55
- if str(project_root) not in sys.path:
56
- sys.path.insert(0, str(project_root))
57
-
58
- # 从 crawlo.cfg 读取 settings 模块,获取项目包名
59
- config = configparser.ConfigParser()
60
- try:
61
- config.read(cfg_file, encoding='utf-8')
62
- settings_module = config.get('settings', 'default')
63
- project_package = settings_module.split('.')[0] # e.g., myproject.settings -> myproject
64
- except Exception as e:
65
- console.print(f"[bold red]:cross_mark: Error reading crawlo.cfg:[/bold red] {e}")
70
+ # 验证项目环境
71
+ is_valid, project_package, error_msg = validate_project_environment()
72
+ if not is_valid:
73
+ show_error_panel("Not a Crawlo Project", error_msg)
66
74
  return 1
75
+
76
+ project_root = get_project_root()
67
77
 
68
78
  # 确定 items 模块的路径
69
79
  items_module_path = f"{project_package}.items"
@@ -93,17 +103,23 @@ def main(args):
93
103
 
94
104
  spider_file = spiders_dir / f'{spider_name}.py'
95
105
  if spider_file.exists():
96
- console.print(f"[bold red]:cross_mark: Error:[/bold red] Spider '[cyan]{spider_name}[/cyan]' already exists at [green]{spider_file}[/green]")
106
+ show_error_panel(
107
+ "Spider Already Exists",
108
+ f"Spider '[cyan]{spider_name}[/cyan]' already exists at\n[green]{spider_file}[/green]"
109
+ )
97
110
  return 1
98
111
 
99
112
  # 模板路径
100
113
  tmpl_path = TEMPLATES_DIR / 'spider' / 'spider.py.tmpl'
101
114
  if not tmpl_path.exists():
102
- console.print(f"[bold red]:cross_mark: Error:[/bold red] Template file not found at [cyan]{tmpl_path}[/cyan]")
115
+ show_error_panel(
116
+ "Template Not Found",
117
+ f"Template file not found at [cyan]{tmpl_path}[/cyan]"
118
+ )
103
119
  return 1
104
120
 
105
121
  # 生成类名
106
- class_name = f"{spider_name.capitalize()}Spider"
122
+ class_name = f"{spider_name.replace('_', '').capitalize()}Spider"
107
123
 
108
124
  context = {
109
125
  'spider_name': spider_name,
@@ -113,14 +129,24 @@ def main(args):
113
129
  'class_name': class_name
114
130
  }
115
131
 
116
- content = _render_template(tmpl_path, context)
117
-
118
- with open(spider_file, 'w', encoding='utf-8') as f:
119
- f.write(content)
120
-
121
- console.print(f":white_check_mark: [green]Spider '[bold]{spider_name}[/bold]' created successfully![/green]")
122
- console.print(f" → Location: [cyan]{spider_file}[/cyan]")
123
- console.print("\n[bold]Next step:[/bold]")
124
- console.print(f" [blue]crawlo run[/blue] {spider_name}")
125
-
126
- return 0
132
+ try:
133
+ content = _render_template(tmpl_path, context)
134
+ with open(spider_file, 'w', encoding='utf-8') as f:
135
+ f.write(content)
136
+
137
+ console.print(f":white_check_mark: [green]Spider '[bold]{spider_name}[/bold]' created successfully![/green]")
138
+ console.print(f" → Location: [cyan]{spider_file}[/cyan]")
139
+ console.print(f" Class: [yellow]{class_name}[/yellow]")
140
+ console.print(f" → Domain: [blue]{domain}[/blue]")
141
+ console.print("\n[bold]Next steps:[/bold]")
142
+ console.print(f" [blue]crawlo run[/blue] {spider_name}")
143
+ console.print(f" [blue]crawlo check[/blue] {spider_name}")
144
+
145
+ return 0
146
+
147
+ except Exception as e:
148
+ show_error_panel(
149
+ "Creation Failed",
150
+ f"Failed to create spider: {e}"
151
+ )
152
+ return 1
crawlo/commands/list.py CHANGED
@@ -6,7 +6,6 @@
6
6
  # @Desc : 命令行入口:crawlo list,用于列出所有已注册的爬虫
7
7
  """
8
8
  import sys
9
- import configparser
10
9
  from pathlib import Path
11
10
  from importlib import import_module
12
11
 
@@ -18,110 +17,96 @@ from rich import box
18
17
 
19
18
  from crawlo.crawler import CrawlerProcess
20
19
  from crawlo.utils.log import get_logger
20
+ from .utils import validate_project_environment, show_error_panel
21
21
 
22
22
  logger = get_logger(__name__)
23
23
  console = Console()
24
24
 
25
25
 
26
- def get_project_root():
27
- """
28
- 自动检测项目根目录:从当前目录向上查找 crawlo.cfg
29
- 找到后返回该目录路径(字符串),最多向上查找10层。
30
- """
31
- current = Path.cwd()
32
- for _ in range(10):
33
- cfg = current / "crawlo.cfg"
34
- if cfg.exists():
35
- return str(current)
36
- if current == current.parent:
37
- break
38
- current = current.parent
39
- return None # 未找到
40
-
41
-
42
26
  def main(args):
43
27
  """
44
28
  主函数:列出所有可用爬虫
45
- 用法: crawlo list
29
+ 用法: crawlo list [--json]
46
30
  """
47
- if args:
48
- console.print("[bold red]❌ Error:[/bold red] Usage: [blue]crawlo list[/blue]")
31
+ show_json = "--json" in args
32
+
33
+ # 过滤掉参数后检查是否有额外参数
34
+ filtered_args = [arg for arg in args if not arg.startswith('--')]
35
+ if filtered_args:
36
+ if show_json:
37
+ console.print_json(data={"success": False, "error": "Usage: crawlo list [--json]"})
38
+ else:
39
+ console.print("[bold red]❌ Error:[/bold red] Usage: [blue]crawlo list[/blue] [--json]")
49
40
  return 1
50
41
 
51
42
  try:
52
- # 1. 查找项目根目录
53
- project_root = get_project_root()
54
- if not project_root:
55
- console.print(Panel(
56
- Text.from_markup(
57
- ":cross_mark: [bold red]Cannot find 'crawlo.cfg'[/bold red]\n"
58
- "💡 Run this command inside your project directory.\n"
59
- "🚀 Or create a new project with:\n"
60
- " [blue]crawlo startproject myproject[/blue]"
61
- ),
62
- title="❌ Not in a Crawlo Project",
63
- border_style="red",
64
- padding=(1, 2)
65
- ))
66
- return 1
67
-
68
- project_root_path = Path(project_root)
69
- project_root_str = str(project_root_path)
70
-
71
- # 2. 将项目根加入 Python 路径
72
- if project_root_str not in sys.path:
73
- sys.path.insert(0, project_root_str)
74
-
75
- # 3. 读取 crawlo.cfg 获取 settings 模块
76
- cfg_file = project_root_path / "crawlo.cfg"
77
- config = configparser.ConfigParser()
78
- config.read(cfg_file, encoding="utf-8")
79
-
80
- if not config.has_section("settings") or not config.has_option("settings", "default"):
81
- console.print(Panel(
82
- ":cross_mark: [bold red]Invalid crawlo.cfg[/bold red]\n"
83
- "Missing [settings] section or 'default' option.",
84
- title="❌ Config Error",
85
- border_style="red"
86
- ))
87
- return 1
88
-
89
- settings_module = config.get("settings", "default")
90
- project_package = settings_module.split(".")[0]
91
-
92
- # 4. 确保项目包可导入
93
- try:
94
- import_module(project_package)
95
- except ImportError as e:
96
- console.print(Panel(
97
- f":cross_mark: Failed to import project package '[cyan]{project_package}[/cyan]':\n{e}",
98
- title="❌ Import Error",
99
- border_style="red"
100
- ))
43
+ # 验证项目环境
44
+ is_valid, project_package, error_msg = validate_project_environment()
45
+ if not is_valid:
46
+ if show_json:
47
+ console.print_json(data={"success": False, "error": error_msg})
48
+ else:
49
+ show_error_panel("Not a Crawlo Project", error_msg)
101
50
  return 1
102
51
 
103
- # 5. 初始化 CrawlerProcess 并加载爬虫模块
52
+ # 初始化 CrawlerProcess 并加载爬虫模块
104
53
  spider_modules = [f"{project_package}.spiders"]
105
54
  process = CrawlerProcess(spider_modules=spider_modules)
106
55
 
107
- # 6. 获取所有爬虫名称
56
+ # 获取所有爬虫名称
108
57
  spider_names = process.get_spider_names()
109
58
  if not spider_names:
110
- console.print(Panel(
111
- Text.from_markup(
112
- ":envelope_with_arrow: [bold]No spiders found[/bold] in '[cyan]spiders/[/cyan]' directory.\n\n"
113
- "[bold]💡 Make sure:[/bold]\n"
114
- " Spider classes inherit from [blue]`crawlo.spider.Spider`[/blue]\n"
115
- " • Each spider has a [green]`name`[/green] attribute\n"
116
- " • Spiders are imported in [cyan]`spiders/__init__.py`[/cyan] (if using package)"
117
- ),
118
- title="📭 No Spiders Found",
119
- border_style="yellow",
120
- padding=(1, 2)
121
- ))
122
- return 1
123
-
124
- # 7. 输出爬虫列表 —— 使用表格
59
+ if show_json:
60
+ console.print_json(data={
61
+ "success": True,
62
+ "spiders": [],
63
+ "message": "No spiders found in project"
64
+ })
65
+ else:
66
+ console.print(Panel(
67
+ Text.from_markup(
68
+ ":envelope_with_arrow: [bold]No spiders found[/bold] in '[cyan]spiders/[/cyan]' directory.\n\n"
69
+ "[bold]💡 Make sure:[/bold]\n"
70
+ " • Spider classes inherit from [blue]`crawlo.spider.Spider`[/blue]\n"
71
+ " • Each spider has a [green]`name`[/green] attribute\n"
72
+ " • Spiders are imported in [cyan]`spiders/__init__.py`[/cyan] (if using package)"
73
+ ),
74
+ title="📭 No Spiders Found",
75
+ border_style="yellow",
76
+ padding=(1, 2)
77
+ ))
78
+ return 0
79
+
80
+ # 准备爬虫信息
81
+ spider_info = []
82
+ for name in sorted(spider_names):
83
+ spider_cls = process.get_spider_class(name)
84
+ module_name = spider_cls.__module__.replace(f"{project_package}.", "")
85
+
86
+ # 获取额外信息
87
+ start_urls_count = len(getattr(spider_cls, 'start_urls', []))
88
+ allowed_domains = getattr(spider_cls, 'allowed_domains', [])
89
+ custom_settings = getattr(spider_cls, 'custom_settings', {})
90
+
91
+ spider_info.append({
92
+ "name": name,
93
+ "class": spider_cls.__name__,
94
+ "module": module_name,
95
+ "start_urls_count": start_urls_count,
96
+ "allowed_domains": allowed_domains,
97
+ "has_custom_settings": bool(custom_settings)
98
+ })
99
+
100
+ # JSON 输出
101
+ if show_json:
102
+ console.print_json(data={
103
+ "success": True,
104
+ "count": len(spider_info),
105
+ "spiders": spider_info
106
+ })
107
+ return 0
108
+
109
+ # 表格输出
125
110
  table = Table(
126
111
  title=f"📋 Found {len(spider_names)} spider(s)",
127
112
  box=box.ROUNDED,
@@ -132,16 +117,40 @@ def main(args):
132
117
  table.add_column("Name", style="cyan", no_wrap=True)
133
118
  table.add_column("Class", style="green")
134
119
  table.add_column("Module", style="dim")
135
-
136
- for name in sorted(spider_names):
137
- spider_cls = process.get_spider_class(name)
138
- module_name = spider_cls.__module__.replace(f"{project_package}.", "")
139
- table.add_row(name, spider_cls.__name__, module_name)
120
+ table.add_column("URLs", style="blue", justify="center")
121
+ table.add_column("Domains", style="yellow")
122
+ table.add_column("Custom Settings", style="magenta", justify="center")
123
+
124
+ for info in spider_info:
125
+ domains_display = ", ".join(info["allowed_domains"][:2]) # 显示前2个域名
126
+ if len(info["allowed_domains"]) > 2:
127
+ domains_display += f" (+{len(info['allowed_domains'])-2})"
128
+ elif not domains_display:
129
+ domains_display = "-"
130
+
131
+ table.add_row(
132
+ info["name"],
133
+ info["class"],
134
+ info["module"],
135
+ str(info["start_urls_count"]),
136
+ domains_display,
137
+ "✓" if info["has_custom_settings"] else "-"
138
+ )
140
139
 
141
140
  console.print(table)
141
+
142
+ # 显示使用提示
143
+ console.print("\n[bold]🚀 Next steps:[/bold]")
144
+ console.print(" [blue]crawlo run[/blue] <spider_name> # Run a specific spider")
145
+ console.print(" [blue]crawlo run[/blue] all # Run all spiders")
146
+ console.print(" [blue]crawlo check[/blue] <spider_name> # Check spider validity")
147
+
142
148
  return 0
143
149
 
144
150
  except Exception as e:
145
- console.print(f"[bold red]❌ Unexpected error:[/bold red] {e}")
151
+ if show_json:
152
+ console.print_json(data={"success": False, "error": str(e)})
153
+ else:
154
+ console.print(f"[bold red]❌ Unexpected error:[/bold red] {e}")
146
155
  logger.exception("Exception during 'crawlo list'")
147
- return 1
156
+ return 1
@@ -6,11 +6,14 @@
6
6
  # @Desc : 命令行入口:crawlo startproject baidu,创建项目。
7
7
  """
8
8
  import shutil
9
+ import re
9
10
  from pathlib import Path
10
11
  from rich.console import Console
11
12
  from rich.panel import Panel
12
13
  from rich.text import Text
13
14
 
15
+ from .utils import show_error_panel, show_success_panel
16
+
14
17
  # 初始化 rich 控制台
15
18
  console = Console()
16
19
 
@@ -51,16 +54,86 @@ def _copytree_with_templates(src, dst, context):
51
54
  shutil.copy2(item, dst_item)
52
55
 
53
56
 
57
+ def validate_project_name(project_name: str) -> tuple[bool, str]:
58
+ """
59
+ 验证项目名称是否有效
60
+
61
+ Returns:
62
+ tuple[bool, str]: (是否有效, 错误信息)
63
+ """
64
+ # 检查是否为空
65
+ if not project_name or not project_name.strip():
66
+ return False, "Project name cannot be empty"
67
+
68
+ project_name = project_name.strip()
69
+
70
+ # 检查长度
71
+ if len(project_name) > 50:
72
+ return False, "Project name too long (max 50 characters)"
73
+
74
+ # 检查是否为Python关键字
75
+ python_keywords = {
76
+ 'False', 'None', 'True', 'and', 'as', 'assert', 'break', 'class',
77
+ 'continue', 'def', 'del', 'elif', 'else', 'except', 'finally',
78
+ 'for', 'from', 'global', 'if', 'import', 'in', 'is', 'lambda',
79
+ 'nonlocal', 'not', 'or', 'pass', 'raise', 'return', 'try',
80
+ 'while', 'with', 'yield'
81
+ }
82
+ if project_name in python_keywords:
83
+ return False, f"'{project_name}' is a Python keyword and cannot be used as project name"
84
+
85
+ # 检查是否为有效的Python标识符
86
+ if not project_name.isidentifier():
87
+ return False, "Project name must be a valid Python identifier"
88
+
89
+ # 检查格式(建议使用snake_case)
90
+ if not re.match(r'^[a-z][a-z0-9_]*$', project_name):
91
+ return False, (
92
+ "Project name should start with lowercase letter and "
93
+ "contain only lowercase letters, numbers, and underscores"
94
+ )
95
+
96
+ # 检查是否以数字结尾(不推荐)
97
+ if project_name[-1].isdigit():
98
+ return False, "Project name should not end with a number"
99
+
100
+ return True, ""
101
+
102
+
54
103
  def main(args):
55
104
  if len(args) != 1:
56
- console.print("[bold red]Error:[/bold red] Usage: crawlo startproject <project_name>")
105
+ console.print("[bold red]Error:[/bold red] Usage: [blue]crawlo startproject[/blue] <project_name>")
106
+ console.print("💡 Examples:")
107
+ console.print(" [blue]crawlo startproject[/blue] my_spider_project")
108
+ console.print(" [blue]crawlo startproject[/blue] news_crawler")
109
+ console.print(" [blue]crawlo startproject[/blue] ecommerce_spider")
57
110
  return 1
58
111
 
59
112
  project_name = args[0]
113
+
114
+ # 验证项目名称
115
+ is_valid, error_msg = validate_project_name(project_name)
116
+ if not is_valid:
117
+ show_error_panel(
118
+ "Invalid Project Name",
119
+ f"[cyan]{project_name}[/cyan] is not a valid project name.\n"
120
+ f"❌ {error_msg}\n\n"
121
+ "💡 Project name should:\n"
122
+ " • Start with lowercase letter\n"
123
+ " • Contain only lowercase letters, numbers, and underscores\n"
124
+ " • Be a valid Python identifier\n"
125
+ " • Not be a Python keyword"
126
+ )
127
+ return 1
128
+
60
129
  project_dir = Path(project_name)
61
130
 
62
131
  if project_dir.exists():
63
- console.print(f"[bold red]Error:[/bold red] Directory '[cyan]{project_dir}[/cyan]' already exists.")
132
+ show_error_panel(
133
+ "Directory Exists",
134
+ f"Directory '[cyan]{project_dir}[/cyan]' already exists.\n"
135
+ "💡 Choose a different project name or remove the existing directory."
136
+ )
64
137
  return 1
65
138
 
66
139
  context = {'project_name': project_name}
@@ -87,6 +160,10 @@ def main(args):
87
160
  # 4. 创建 logs 目录
88
161
  (project_dir / 'logs').mkdir(exist_ok=True)
89
162
  console.print(":white_check_mark: Created logs directory")
163
+
164
+ # 5. 创建 output 目录(用于数据输出)
165
+ (project_dir / 'output').mkdir(exist_ok=True)
166
+ console.print(":white_check_mark: Created output directory")
90
167
 
91
168
  # 成功面板
92
169
  success_text = Text.from_markup(f"Project '[bold cyan]{project_name}[/bold cyan]' created successfully!")
@@ -94,17 +171,25 @@ def main(args):
94
171
 
95
172
  # 下一步操作提示(对齐美观 + 语法高亮)
96
173
  next_steps = f"""
97
- [bold]Next steps:[/bold]
174
+ [bold]🚀 Next steps:[/bold]
98
175
  [blue]cd[/blue] {project_name}
99
176
  [blue]crawlo genspider[/blue] example example.com
100
177
  [blue]crawlo run[/blue] example
178
+
179
+ [bold]📚 Learn more:[/bold]
180
+ [blue]crawlo list[/blue] # List all spiders
181
+ [blue]crawlo check[/blue] example # Check spider validity
182
+ [blue]crawlo stats[/blue] # View statistics
101
183
  """.strip()
102
184
  console.print(next_steps)
103
185
 
104
186
  return 0
105
187
 
106
188
  except Exception as e:
107
- console.print(f"[bold red]Error creating project:[/bold red] {e}")
189
+ show_error_panel(
190
+ "Creation Failed",
191
+ f"Failed to create project: {e}"
192
+ )
108
193
  if project_dir.exists():
109
194
  shutil.rmtree(project_dir, ignore_errors=True)
110
195
  console.print("[red]:cross_mark: Cleaned up partially created project.[/red]")