crawlo 1.2.1__py3-none-any.whl → 1.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlo might be problematic. Click here for more details.

Files changed (49) hide show
  1. crawlo/__version__.py +1 -1
  2. crawlo/cli.py +16 -0
  3. crawlo/commands/check.py +69 -69
  4. crawlo/commands/genspider.py +25 -25
  5. crawlo/commands/help.py +4 -4
  6. crawlo/commands/list.py +23 -23
  7. crawlo/commands/run.py +34 -34
  8. crawlo/commands/startproject.py +35 -36
  9. crawlo/commands/stats.py +21 -21
  10. crawlo/commands/utils.py +4 -4
  11. crawlo/config.py +4 -4
  12. crawlo/config_validator.py +1 -2
  13. crawlo/data/__init__.py +6 -0
  14. crawlo/data/user_agents.py +108 -0
  15. crawlo/downloader/hybrid_downloader.py +0 -1
  16. crawlo/downloader/selenium_downloader.py +1 -1
  17. crawlo/extension/health_check.py +2 -2
  18. crawlo/items/fields.py +0 -1
  19. crawlo/middleware/offsite.py +0 -1
  20. crawlo/middleware/proxy.py +3 -2
  21. crawlo/middleware/request_ignore.py +0 -1
  22. crawlo/middleware/response_code.py +0 -1
  23. crawlo/middleware/response_filter.py +0 -1
  24. crawlo/middleware/retry.py +1 -1
  25. crawlo/mode_manager.py +3 -3
  26. crawlo/pipelines/database_dedup_pipeline.py +1 -3
  27. crawlo/pipelines/memory_dedup_pipeline.py +2 -2
  28. crawlo/pipelines/mysql_pipeline.py +4 -3
  29. crawlo/pipelines/redis_dedup_pipeline.py +2 -4
  30. crawlo/project.py +2 -2
  31. crawlo/subscriber.py +1 -2
  32. crawlo/templates/project/settings.py.tmpl +1 -3
  33. crawlo/templates/project/settings_distributed.py.tmpl +2 -0
  34. crawlo/utils/batch_processor.py +2 -3
  35. crawlo/utils/controlled_spider_mixin.py +1 -1
  36. crawlo/utils/enhanced_error_handler.py +3 -6
  37. crawlo/utils/env_config.py +1 -1
  38. crawlo/utils/error_handler.py +2 -4
  39. crawlo/utils/large_scale_helper.py +2 -1
  40. crawlo/utils/performance_monitor.py +5 -4
  41. crawlo/utils/redis_connection_pool.py +4 -4
  42. crawlo/utils/redis_key_validator.py +1 -2
  43. crawlo/utils/request_serializer.py +1 -2
  44. crawlo/utils/spider_loader.py +0 -1
  45. {crawlo-1.2.1.dist-info → crawlo-1.2.3.dist-info}/METADATA +1 -1
  46. {crawlo-1.2.1.dist-info → crawlo-1.2.3.dist-info}/RECORD +49 -47
  47. {crawlo-1.2.1.dist-info → crawlo-1.2.3.dist-info}/WHEEL +0 -0
  48. {crawlo-1.2.1.dist-info → crawlo-1.2.3.dist-info}/entry_points.txt +0 -0
  49. {crawlo-1.2.1.dist-info → crawlo-1.2.3.dist-info}/top_level.txt +0 -0
crawlo/__version__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.1"
1
+ __version__ = "1.2.3"
crawlo/cli.py CHANGED
@@ -3,10 +3,20 @@
3
3
  # -*- coding: UTF-8 -*-
4
4
  import sys
5
5
  import argparse
6
+ import os
6
7
  from crawlo.commands import get_commands
7
8
 
8
9
 
9
10
  def main():
11
+ # 获取框架版本号
12
+ version_file = os.path.join(os.path.dirname(__file__), '__version__.py')
13
+ if os.path.exists(version_file):
14
+ with open(version_file, 'r') as f:
15
+ exec(f.read())
16
+ VERSION = locals().get('__version__', '1.0.0')
17
+ else:
18
+ VERSION = '1.0.0'
19
+
10
20
  # 获取所有可用命令
11
21
  commands = get_commands()
12
22
 
@@ -19,11 +29,17 @@ def main():
19
29
 
20
30
  # 添加帮助参数
21
31
  parser.add_argument('-h', '--help', action='store_true', help='显示帮助信息')
32
+ parser.add_argument('-v', '--version', action='store_true', help='显示版本信息')
22
33
  parser.add_argument('command', nargs='?', help='可用命令: ' + ', '.join(commands.keys()))
23
34
 
24
35
  # 解析已知参数
25
36
  args, unknown = parser.parse_known_args()
26
37
 
38
+ # 处理版本参数
39
+ if args.version:
40
+ print(f"Crawlo {VERSION}")
41
+ sys.exit(0)
42
+
27
43
  # 处理帮助参数
28
44
  if args.help or (args.command is None and not unknown):
29
45
  # 导入并运行帮助命令
crawlo/commands/check.py CHANGED
@@ -63,7 +63,7 @@ def auto_fix_spider_file(spider_cls, file_path: Path):
63
63
  break
64
64
 
65
65
  if not class_node:
66
- return False, "Could not find class definition in file."
66
+ return False, "在文件中找不到类定义。"
67
67
 
68
68
  # 1. 修复 name 为空或缺失
69
69
  name_assign = None
@@ -221,12 +221,12 @@ def auto_fix_spider_file(spider_cls, file_path: Path):
221
221
  fixed_source = astor.to_source(tree)
222
222
  with open(file_path, "w", encoding="utf-8") as f:
223
223
  f.write(fixed_source)
224
- return True, "File auto-fixed successfully."
224
+ return True, "文件自动修复成功。"
225
225
  else:
226
- return False, "No fixable issues found."
226
+ return False, "未找到可修复的问题。"
227
227
 
228
228
  except Exception as e:
229
- return False, f"Failed to auto-fix: {e}"
229
+ return False, f"自动修复失败: {e}"
230
230
 
231
231
 
232
232
  class SpiderChangeHandler(FileSystemEventHandler):
@@ -242,14 +242,14 @@ class SpiderChangeHandler(FileSystemEventHandler):
242
242
  if event.src_path.endswith(".py") and "spiders" in event.src_path:
243
243
  file_path = Path(event.src_path)
244
244
  spider_name = file_path.stem
245
- self.console.print(f"\n:eyes: [bold blue]Detected change in[/bold blue] [cyan]{file_path}[/cyan]")
245
+ self.console.print(f"\n:eyes: [bold blue]检测到变更[/bold blue] [cyan]{file_path}[/cyan]")
246
246
  self.check_and_fix_spider(spider_name)
247
247
 
248
248
  def check_and_fix_spider(self, spider_name):
249
249
  try:
250
250
  process = CrawlerProcess(spider_modules=self.spider_modules)
251
251
  if spider_name not in process.get_spider_names():
252
- self.console.print(f"[yellow]⚠️ {spider_name} is not a registered spider.[/yellow]")
252
+ self.console.print(f"[yellow]⚠️ {spider_name} 不是已注册的爬虫。[/yellow]")
253
253
  return
254
254
 
255
255
  cls = process.get_spider_class(spider_name)
@@ -257,23 +257,23 @@ class SpiderChangeHandler(FileSystemEventHandler):
257
257
 
258
258
  # 简化检查
259
259
  if not getattr(cls, "name", None):
260
- issues.append("missing or empty 'name' attribute")
260
+ issues.append("缺少或为空的 'name' 属性")
261
261
  if not callable(getattr(cls, "start_requests", None)):
262
- issues.append("missing 'start_requests' method")
262
+ issues.append("缺少 'start_requests' 方法")
263
263
  if hasattr(cls, "start_urls") and isinstance(cls.start_urls, str):
264
- issues.append("'start_urls' is string")
264
+ issues.append("'start_urls' 是字符串")
265
265
  if hasattr(cls, "allowed_domains") and isinstance(cls.allowed_domains, str):
266
- issues.append("'allowed_domains' is string")
266
+ issues.append("'allowed_domains' 是字符串")
267
267
 
268
268
  try:
269
269
  spider = cls.create_instance(None)
270
270
  if not callable(getattr(spider, "parse", None)):
271
- issues.append("no 'parse' method")
271
+ issues.append("缺少 'parse' 方法")
272
272
  except Exception:
273
- issues.append("failed to instantiate")
273
+ issues.append("实例化失败")
274
274
 
275
275
  if issues:
276
- self.console.print(f"[red]❌ {spider_name} has issues:[/red]")
276
+ self.console.print(f"[red]❌ {spider_name} 存在问题:[/red]")
277
277
  for issue in issues:
278
278
  self.console.print(f" • {issue}")
279
279
 
@@ -281,21 +281,21 @@ class SpiderChangeHandler(FileSystemEventHandler):
281
281
  file_path = Path(cls.__file__)
282
282
  fixed, msg = auto_fix_spider_file(cls, file_path)
283
283
  if fixed:
284
- self.console.print(f"[green]✅ Auto-fixed: {msg}[/green]")
284
+ self.console.print(f"[green]✅ 自动修复: {msg}[/green]")
285
285
  else:
286
- self.console.print(f"[yellow]⚠️ Could not fix: {msg}[/yellow]")
286
+ self.console.print(f"[yellow]⚠️ 无法修复: {msg}[/yellow]")
287
287
  else:
288
- self.console.print(f"[green]✅ {spider_name} is compliant.[/green]")
288
+ self.console.print(f"[green]✅ {spider_name} 合规。[/green]")
289
289
 
290
290
  except Exception as e:
291
- self.console.print(f"[red]❌ Error checking {spider_name}: {e}[/red]")
291
+ self.console.print(f"[red]❌ 检查 {spider_name} 时出错: {e}[/red]")
292
292
 
293
293
 
294
- def watch_spiders(project_root, project_package, show_fix=False):
295
- console = Console()
294
+ def watch_spiders(project_root: Path, project_package: str, show_fix: bool):
295
+ """监听 spiders 目录变化并自动检查"""
296
296
  spider_path = project_root / project_package / "spiders"
297
297
  if not spider_path.exists():
298
- console.print(f"[red]❌ Spiders directory not found: {spider_path}[/red]")
298
+ console.print(f"[bold red]❌ Spider 目录未找到:[/bold red] {spider_path}")
299
299
  return
300
300
 
301
301
  spider_modules = [f"{project_package}.spiders"]
@@ -304,9 +304,9 @@ def watch_spiders(project_root, project_package, show_fix=False):
304
304
  observer.schedule(event_handler, str(spider_path), recursive=False)
305
305
 
306
306
  console.print(Panel(
307
- f":eyes: [bold blue]Watching for changes in[/bold blue] [cyan]{spider_path}[/cyan]\n"
308
- "Edit any spider file to trigger auto-check...",
309
- title="🚀 Watch Mode Started",
307
+ f":eyes: [bold blue]监听[/bold blue] [cyan]{spider_path}[/cyan] 中的变更\n"
308
+ "编辑任何爬虫文件以触发自动检查...",
309
+ title="🚀 已启动监听模式",
310
310
  border_style="blue"
311
311
  ))
312
312
 
@@ -315,7 +315,7 @@ def watch_spiders(project_root, project_package, show_fix=False):
315
315
  while True:
316
316
  time.sleep(1)
317
317
  except KeyboardInterrupt:
318
- console.print("\n[bold red]🛑 Watch mode stopped.[/bold red]")
318
+ console.print("\n[bold red]🛑 监听模式已停止。[/bold red]")
319
319
  observer.stop()
320
320
  observer.join()
321
321
 
@@ -337,24 +337,24 @@ def main(args):
337
337
 
338
338
  valid_args = {"--fix", "-f", "--ci", "--json", "--watch"}
339
339
  if any(arg not in valid_args for arg in args):
340
- console.print("[bold red]❌ Error:[/bold red] Usage: [blue]crawlo check[/blue] [--fix] [--ci] [--json] [--watch]")
340
+ console.print("[bold red]❌ 错误:[/bold red] 用法: [blue]crawlo check[/blue] [--fix] [--ci] [--json] [--watch]")
341
341
  return 1
342
342
 
343
343
  try:
344
344
  # 1. 查找项目根目录
345
345
  project_root = get_project_root()
346
346
  if not project_root:
347
- msg = ":cross_mark: [bold red]Cannot find 'crawlo.cfg'[/bold red]\n💡 Run this command inside your project directory."
347
+ msg = ":cross_mark: [bold red]找不到 'crawlo.cfg'[/bold red]\n💡 请在项目目录中运行此命令。"
348
348
  if show_json:
349
- console.print_json(data={"success": False, "error": "Project root not found"})
349
+ console.print_json(data={"success": False, "error": "未找到项目根目录"})
350
350
  return 1
351
351
  elif show_ci:
352
- console.print("❌ Project root not found. crawlo.cfg missing.")
352
+ console.print("❌ 未找到项目根目录。缺少 crawlo.cfg")
353
353
  return 1
354
354
  else:
355
355
  console.print(Panel(
356
356
  Text.from_markup(msg),
357
- title="❌ Not in a Crawlo Project",
357
+ title="❌ Crawlo项目",
358
358
  border_style="red",
359
359
  padding=(1, 2)
360
360
  ))
@@ -367,7 +367,7 @@ def main(args):
367
367
  # 2. 读取 crawlo.cfg
368
368
  cfg_file = project_root / "crawlo.cfg"
369
369
  if not cfg_file.exists():
370
- msg = f"Config file not found: {cfg_file}"
370
+ msg = f"配置文件未找到: {cfg_file}"
371
371
  if show_json:
372
372
  console.print_json(data={"success": False, "error": msg})
373
373
  return 1
@@ -375,14 +375,14 @@ def main(args):
375
375
  console.print(f"❌ {msg}")
376
376
  return 1
377
377
  else:
378
- console.print(Panel(msg, title="❌ Missing Config", border_style="red"))
378
+ console.print(Panel(msg, title="❌ 缺少配置文件", border_style="red"))
379
379
  return 1
380
380
 
381
381
  config = configparser.ConfigParser()
382
382
  config.read(cfg_file, encoding="utf-8")
383
383
 
384
384
  if not config.has_section("settings") or not config.has_option("settings", "default"):
385
- msg = "Missing [settings] section or 'default' option in crawlo.cfg"
385
+ msg = "crawlo.cfg 中缺少 [settings] 部分或 'default' 选项"
386
386
  if show_json:
387
387
  console.print_json(data={"success": False, "error": msg})
388
388
  return 1
@@ -390,7 +390,7 @@ def main(args):
390
390
  console.print(f"❌ {msg}")
391
391
  return 1
392
392
  else:
393
- console.print(Panel(msg, title="❌ Invalid Config", border_style="red"))
393
+ console.print(Panel(msg, title="❌ 无效配置", border_style="red"))
394
394
  return 1
395
395
 
396
396
  settings_module = config.get("settings", "default")
@@ -400,7 +400,7 @@ def main(args):
400
400
  try:
401
401
  import_module(project_package)
402
402
  except ImportError as e:
403
- msg = f"Failed to import project package '{project_package}': {e}"
403
+ msg = f"导入项目包 '{project_package}' 失败: {e}"
404
404
  if show_json:
405
405
  console.print_json(data={"success": False, "error": msg})
406
406
  return 1
@@ -408,7 +408,7 @@ def main(args):
408
408
  console.print(f"❌ {msg}")
409
409
  return 1
410
410
  else:
411
- console.print(Panel(msg, title="❌ Import Error", border_style="red"))
411
+ console.print(Panel(msg, title="❌ 导入错误", border_style="red"))
412
412
  return 1
413
413
 
414
414
  # 4. 加载爬虫
@@ -417,23 +417,23 @@ def main(args):
417
417
  spider_names = process.get_spider_names()
418
418
 
419
419
  if not spider_names:
420
- msg = "No spiders found."
420
+ msg = "未找到爬虫。"
421
421
  if show_json:
422
422
  console.print_json(data={"success": True, "warning": msg})
423
423
  return 0
424
424
  elif show_ci:
425
- console.print("📭 No spiders found.")
425
+ console.print("📭 未找到爬虫。")
426
426
  return 0
427
427
  else:
428
428
  console.print(Panel(
429
429
  Text.from_markup(
430
- ":envelope_with_arrow: [bold]No spiders found[/bold]\n\n"
431
- "[bold]💡 Make sure:[/bold]\n"
432
- " • Spiders are defined in '[cyan]spiders[/cyan]' module\n"
433
- " • They have a [green]`name`[/green] attribute\n"
434
- " • Modules are properly imported"
430
+ ":envelope_with_arrow: [bold]未找到爬虫[/bold]\n\n"
431
+ "[bold]💡 确保:[/bold]\n"
432
+ " • 爬虫定义于 '[cyan]spiders[/cyan]' 模块\n"
433
+ " • 具有 [green]`name`[/green] 属性\n"
434
+ " • 模块已正确导入"
435
435
  ),
436
- title="📭 No Spiders Found",
436
+ title="📭 未找到爬虫",
437
437
  border_style="yellow",
438
438
  padding=(1, 2)
439
439
  ))
@@ -441,13 +441,13 @@ def main(args):
441
441
 
442
442
  # 5. 如果启用 watch 模式,启动监听
443
443
  if show_watch:
444
- console.print("[bold blue]:eyes: Starting watch mode...[/bold blue]")
444
+ console.print("[bold blue]:eyes: 启动监听模式...[/bold blue]")
445
445
  watch_spiders(project_root, project_package, show_fix)
446
446
  return 0 # watch 是长期运行,不返回
447
447
 
448
448
  # 6. 开始检查(非 watch 模式)
449
449
  if not show_ci and not show_json:
450
- console.print(f":mag: [bold]Checking {len(spider_names)} spider(s)...[/bold]\n")
450
+ console.print(f":mag: [bold]正在检查 {len(spider_names)} 个爬虫...[/bold]\n")
451
451
 
452
452
  issues_found = False
453
453
  results = []
@@ -458,29 +458,29 @@ def main(args):
458
458
 
459
459
  # 检查 name 属性
460
460
  if not getattr(cls, "name", None):
461
- issues.append("missing or empty 'name' attribute")
461
+ issues.append("缺少或为空的 'name' 属性")
462
462
  elif not isinstance(cls.name, str):
463
- issues.append("'name' is not a string")
463
+ issues.append("'name' 不是字符串")
464
464
 
465
465
  # 检查 start_requests 是否可调用
466
466
  if not callable(getattr(cls, "start_requests", None)):
467
- issues.append("missing or non-callable 'start_requests' method")
467
+ issues.append("缺少或不可调用的 'start_requests' 方法")
468
468
 
469
469
  # 检查 start_urls 类型(不应是字符串)
470
470
  if hasattr(cls, "start_urls") and isinstance(cls.start_urls, str):
471
- issues.append("'start_urls' is a string; should be list or tuple")
471
+ issues.append("'start_urls' 是字符串;应为列表或元组")
472
472
 
473
473
  # 检查 allowed_domains 类型
474
474
  if hasattr(cls, "allowed_domains") and isinstance(cls.allowed_domains, str):
475
- issues.append("'allowed_domains' is a string; should be list or tuple")
475
+ issues.append("'allowed_domains' 是字符串;应为列表或元组")
476
476
 
477
477
  # 实例化并检查 parse 方法
478
478
  try:
479
479
  spider = cls.create_instance(None)
480
480
  if not callable(getattr(spider, "parse", None)):
481
- issues.append("no 'parse' method defined (recommended)")
481
+ issues.append("未定义 'parse' 方法(推荐)")
482
482
  except Exception as e:
483
- issues.append(f"failed to instantiate spider: {e}")
483
+ issues.append(f"实例化爬虫失败: {e}")
484
484
 
485
485
  # 自动修复(如果启用)
486
486
  if issues and show_fix:
@@ -489,14 +489,14 @@ def main(args):
489
489
  fixed, msg = auto_fix_spider_file(cls, file_path)
490
490
  if fixed:
491
491
  if not show_ci and not show_json:
492
- console.print(f"[green]🔧 Auto-fixed {name} → {msg}[/green]")
492
+ console.print(f"[green]🔧 已自动修复 {name} → {msg}[/green]")
493
493
  issues = [] # 认为已修复
494
494
  else:
495
495
  if not show_ci and not show_json:
496
- console.print(f"[yellow]⚠️ Could not auto-fix {name}: {msg}[/yellow]")
496
+ console.print(f"[yellow]⚠️ 无法自动修复 {name}: {msg}[/yellow]")
497
497
  except Exception as e:
498
498
  if not show_ci and not show_json:
499
- console.print(f"[yellow]⚠️ Failed to locate source file for {name}: {e}[/yellow]")
499
+ console.print(f"[yellow]⚠️ 找不到 {name} 的源文件: {e}[/yellow]")
500
500
 
501
501
  results.append({
502
502
  "name": name,
@@ -525,26 +525,26 @@ def main(args):
525
525
 
526
526
  if show_ci:
527
527
  if issues_found:
528
- console.print("❌ Compliance check failed.")
528
+ console.print("❌ 合规性检查失败。")
529
529
  for r in results:
530
530
  if r["issues"]:
531
531
  console.print(f" • {r['name']}: {', '.join(r['issues'])}")
532
532
  else:
533
- console.print("✅ All spiders compliant.")
533
+ console.print("✅ 所有爬虫合规。")
534
534
  return 1 if issues_found else 0
535
535
 
536
536
  # 9. 默认 rich 输出
537
537
  table = Table(
538
- title="🔍 Spider Compliance Check Results",
538
+ title="🔍 爬虫合规性检查结果",
539
539
  box=box.ROUNDED,
540
540
  show_header=True,
541
541
  header_style="bold magenta",
542
542
  title_style="bold green"
543
543
  )
544
- table.add_column("Status", style="bold", width=4)
545
- table.add_column("Name", style="cyan")
546
- table.add_column("Class", style="green")
547
- table.add_column("Issues", style="yellow", overflow="fold")
544
+ table.add_column("状态", style="bold", width=4)
545
+ table.add_column("名称", style="cyan")
546
+ table.add_column("类名", style="green")
547
+ table.add_column("问题", style="yellow", overflow="fold")
548
548
 
549
549
  for res in results:
550
550
  if res["issues"]:
@@ -561,29 +561,29 @@ def main(args):
561
561
 
562
562
  if issues_found:
563
563
  console.print(Panel(
564
- ":warning: [bold red]Some spiders have issues.[/bold red]\nPlease fix them before running.",
565
- title="⚠️ Compliance Check Failed",
564
+ ":warning: [bold red]一些爬虫存在问题。[/bold red]\n请在运行前修复这些问题。",
565
+ title="⚠️ 合规性检查失败",
566
566
  border_style="red",
567
567
  padding=(1, 2)
568
568
  ))
569
569
  return 1
570
570
  else:
571
571
  console.print(Panel(
572
- ":tada: [bold green]All spiders are compliant and well-defined![/bold green]\nReady to crawl! 🕷️🚀",
573
- title="🎉 Check Passed",
572
+ ":tada: [bold green]所有爬虫都合规且定义良好![/bold green]\n准备开始爬取! 🕷️🚀",
573
+ title="🎉 检查通过",
574
574
  border_style="green",
575
575
  padding=(1, 2)
576
576
  ))
577
577
  return 0
578
578
 
579
579
  except Exception as e:
580
- logger.exception("Exception in 'crawlo check'")
580
+ logger.exception("执行 'crawlo check' 时发生异常")
581
581
  if show_json:
582
582
  console.print_json(data={"success": False, "error": str(e)})
583
583
  elif show_ci:
584
- console.print(f"❌ Unexpected error: {e}")
584
+ console.print(f"❌ 意外错误: {e}")
585
585
  else:
586
- console.print(f"[bold red]❌ Unexpected error during check:[/bold red] {e}")
586
+ console.print(f"[bold red]❌ 检查过程中发生意外错误:[/bold red] {e}")
587
587
  return 1
588
588
 
589
589
 
@@ -37,8 +37,8 @@ def _render_template(tmpl_path, context):
37
37
 
38
38
  def main(args):
39
39
  if len(args) < 2:
40
- console.print("[bold red]Error:[/bold red] Usage: [blue]crawlo genspider[/blue] <spider_name> <domain>")
41
- console.print("💡 Examples:")
40
+ console.print("[bold red]错误:[/bold red] 用法: [blue]crawlo genspider[/blue] <爬虫名称> <域名>")
41
+ console.print("💡 示例:")
42
42
  console.print(" [blue]crawlo genspider[/blue] news_spider news.example.com")
43
43
  console.print(" [blue]crawlo genspider[/blue] product_spider shop.example.com")
44
44
  return 1
@@ -49,28 +49,28 @@ def main(args):
49
49
  # 验证爬虫名称
50
50
  if not validate_spider_name(spider_name):
51
51
  show_error_panel(
52
- "Invalid Spider Name",
53
- f"Spider name '[cyan]{spider_name}[/cyan]' is invalid.\n"
54
- "💡 Spider name should:\n"
55
- " • Start with lowercase letter\n"
56
- " • Contain only lowercase letters, numbers, and underscores\n"
57
- " • Be a valid Python identifier"
52
+ "无效的爬虫名称",
53
+ f"爬虫名称 '[cyan]{spider_name}[/cyan]' 无效。\n"
54
+ "💡 爬虫名称应:\n"
55
+ " • 以小写字母开头\n"
56
+ " • 只能包含小写字母、数字和下划线\n"
57
+ " • 是有效的Python标识符"
58
58
  )
59
59
  return 1
60
60
 
61
61
  # 验证域名格式
62
62
  if not is_valid_domain(domain):
63
63
  show_error_panel(
64
- "Invalid Domain",
65
- f"Domain '[cyan]{domain}[/cyan]' format is invalid.\n"
66
- "💡 Please provide a valid domain name like 'example.com'"
64
+ "无效的域名",
65
+ f"域名 '[cyan]{domain}[/cyan]' 格式无效。\n"
66
+ "💡 请提供有效的域名,如 'example.com'"
67
67
  )
68
68
  return 1
69
69
 
70
70
  # 验证项目环境
71
71
  is_valid, project_package, error_msg = validate_project_environment()
72
72
  if not is_valid:
73
- show_error_panel("Not a Crawlo Project", error_msg)
73
+ show_error_panel("Crawlo项目", error_msg)
74
74
  return 1
75
75
 
76
76
  project_root = get_project_root()
@@ -91,10 +91,10 @@ def main(args):
91
91
  if item_classes:
92
92
  default_item_class = item_classes[0].__name__
93
93
  else:
94
- console.print("[yellow]:warning: Warning:[/yellow] No item class found in [cyan]items.py[/cyan], using [green]ExampleItem[/green].")
94
+ console.print("[yellow]:warning: 警告:[/yellow] [cyan]items.py[/cyan] 中未找到项目类,使用 [green]ExampleItem[/green]")
95
95
 
96
96
  except ImportError as e:
97
- console.print(f"[yellow]:warning: Warning:[/yellow] Failed to import [cyan]{items_module_path}[/cyan]: {e}")
97
+ console.print(f"[yellow]:warning: 警告:[/yellow] 导入 [cyan]{items_module_path}[/cyan] 失败: {e}")
98
98
  # 仍使用默认 ExampleItem,不中断流程
99
99
 
100
100
  # 创建爬虫文件
@@ -104,8 +104,8 @@ def main(args):
104
104
  spider_file = spiders_dir / f'{spider_name}.py'
105
105
  if spider_file.exists():
106
106
  show_error_panel(
107
- "Spider Already Exists",
108
- f"Spider '[cyan]{spider_name}[/cyan]' already exists at\n[green]{spider_file}[/green]"
107
+ "爬虫已存在",
108
+ f"爬虫 '[cyan]{spider_name}[/cyan]' 已存在于\n[green]{spider_file}[/green]"
109
109
  )
110
110
  return 1
111
111
 
@@ -113,8 +113,8 @@ def main(args):
113
113
  tmpl_path = TEMPLATES_DIR / 'spider' / 'spider.py.tmpl'
114
114
  if not tmpl_path.exists():
115
115
  show_error_panel(
116
- "Template Not Found",
117
- f"Template file not found at [cyan]{tmpl_path}[/cyan]"
116
+ "模板未找到",
117
+ f"模板文件未找到于 [cyan]{tmpl_path}[/cyan]"
118
118
  )
119
119
  return 1
120
120
 
@@ -134,11 +134,11 @@ def main(args):
134
134
  with open(spider_file, 'w', encoding='utf-8') as f:
135
135
  f.write(content)
136
136
 
137
- console.print(f":white_check_mark: [green]Spider '[bold]{spider_name}[/bold]' created successfully![/green]")
138
- console.print(f" → Location: [cyan]{spider_file}[/cyan]")
139
- console.print(f" → Class: [yellow]{class_name}[/yellow]")
140
- console.print(f" → Domain: [blue]{domain}[/blue]")
141
- console.print("\n[bold]Next steps:[/bold]")
137
+ console.print(f":white_check_mark: [green]爬虫 '[bold]{spider_name}[/bold]' 创建成功![/green]")
138
+ console.print(f" → 位置: [cyan]{spider_file}[/cyan]")
139
+ console.print(f" → 类名: [yellow]{class_name}[/yellow]")
140
+ console.print(f" → 域名: [blue]{domain}[/blue]")
141
+ console.print("\n[bold]下一步操作:[/bold]")
142
142
  console.print(f" [blue]crawlo run[/blue] {spider_name}")
143
143
  console.print(f" [blue]crawlo check[/blue] {spider_name}")
144
144
 
@@ -146,7 +146,7 @@ def main(args):
146
146
 
147
147
  except Exception as e:
148
148
  show_error_panel(
149
- "Creation Failed",
150
- f"Failed to create spider: {e}"
149
+ "创建失败",
150
+ f"创建爬虫失败: {e}"
151
151
  )
152
152
  return 1
crawlo/commands/help.py CHANGED
@@ -137,7 +137,7 @@ def show_help():
137
137
  console.print()
138
138
 
139
139
  # 显示更多信息
140
- console.print("[bold green]更多信息:[/bold green]")
141
- console.print(" 文档: https://crawlo.readthedocs.io/")
142
- console.print(" 源码: https://github.com/crawl-coder/Crawlo")
143
- console.print(" 问题: https://github.com/crawl-coder/Crawlo/issues")
140
+ # console.print("[bold green]更多信息:[/bold green]")
141
+ # console.print(" 文档: https://crawlo.readthedocs.io/")
142
+ # console.print(" 源码: https://github.com/crawl-coder/Crawlo")
143
+ # console.print(" 问题: https://github.com/crawl-coder/Crawlo/issues")
crawlo/commands/list.py CHANGED
@@ -34,9 +34,9 @@ def main(args):
34
34
  filtered_args = [arg for arg in args if not arg.startswith('--')]
35
35
  if filtered_args:
36
36
  if show_json:
37
- console.print_json(data={"success": False, "error": "Usage: crawlo list [--json]"})
37
+ console.print_json(data={"success": False, "error": "用法: crawlo list [--json]"})
38
38
  else:
39
- console.print("[bold red]❌ Error:[/bold red] Usage: [blue]crawlo list[/blue] [--json]")
39
+ console.print("[bold red]❌ 错误:[/bold red] 用法: [blue]crawlo list[/blue] [--json]")
40
40
  return 1
41
41
 
42
42
  try:
@@ -46,7 +46,7 @@ def main(args):
46
46
  if show_json:
47
47
  console.print_json(data={"success": False, "error": error_msg})
48
48
  else:
49
- show_error_panel("Not a Crawlo Project", error_msg)
49
+ show_error_panel("Crawlo项目", error_msg)
50
50
  return 1
51
51
 
52
52
  # 初始化 CrawlerProcess 并加载爬虫模块
@@ -60,18 +60,18 @@ def main(args):
60
60
  console.print_json(data={
61
61
  "success": True,
62
62
  "spiders": [],
63
- "message": "No spiders found in project"
63
+ "message": "项目中未找到爬虫"
64
64
  })
65
65
  else:
66
66
  console.print(Panel(
67
67
  Text.from_markup(
68
- ":envelope_with_arrow: [bold]No spiders found[/bold] in '[cyan]spiders/[/cyan]' directory.\n\n"
69
- "[bold]💡 Make sure:[/bold]\n"
70
- " • Spider classes inherit from [blue]`crawlo.spider.Spider`[/blue]\n"
71
- " • Each spider has a [green]`name`[/green] attribute\n"
72
- " • Spiders are imported in [cyan]`spiders/__init__.py`[/cyan] (if using package)"
68
+ ":envelope_with_arrow: [bold]未找到爬虫[/bold] '[cyan]spiders/[/cyan]' 目录。\n\n"
69
+ "[bold]💡 确保:[/bold]\n"
70
+ " • 爬虫类继承自 [blue]`crawlo.spider.Spider`[/blue]\n"
71
+ " • 每个爬虫都有 [green]`name`[/green] 属性\n"
72
+ " • 爬虫已在 [cyan]`spiders/__init__.py`[/cyan] 中导入 (如果使用包)"
73
73
  ),
74
- title="📭 No Spiders Found",
74
+ title="📭 未找到爬虫",
75
75
  border_style="yellow",
76
76
  padding=(1, 2)
77
77
  ))
@@ -108,18 +108,18 @@ def main(args):
108
108
 
109
109
  # 表格输出
110
110
  table = Table(
111
- title=f"📋 Found {len(spider_names)} spider(s)",
111
+ title=f"📋 找到 {len(spider_names)} 个爬虫",
112
112
  box=box.ROUNDED,
113
113
  show_header=True,
114
114
  header_style="bold magenta",
115
115
  title_style="bold green"
116
116
  )
117
- table.add_column("Name", style="cyan", no_wrap=True)
118
- table.add_column("Class", style="green")
119
- table.add_column("Module", style="dim")
120
- table.add_column("URLs", style="blue", justify="center")
121
- table.add_column("Domains", style="yellow")
122
- table.add_column("Custom Settings", style="magenta", justify="center")
117
+ table.add_column("名称", style="cyan", no_wrap=True)
118
+ table.add_column("类名", style="green")
119
+ table.add_column("模块", style="dim")
120
+ table.add_column("URL数", style="blue", justify="center")
121
+ table.add_column("域名", style="yellow")
122
+ table.add_column("自定义设置", style="magenta", justify="center")
123
123
 
124
124
  for info in spider_info:
125
125
  domains_display = ", ".join(info["allowed_domains"][:2]) # 显示前2个域名
@@ -140,10 +140,10 @@ def main(args):
140
140
  console.print(table)
141
141
 
142
142
  # 显示使用提示
143
- console.print("\n[bold]🚀 Next steps:[/bold]")
144
- console.print(" [blue]crawlo run[/blue] <spider_name> # Run a specific spider")
145
- console.print(" [blue]crawlo run[/blue] all # Run all spiders")
146
- console.print(" [blue]crawlo check[/blue] <spider_name> # Check spider validity")
143
+ console.print("\n[bold]🚀 下一步操作:[/bold]")
144
+ console.print(" [blue]crawlo run[/blue] <爬虫名称> # 运行指定爬虫")
145
+ console.print(" [blue]crawlo run[/blue] all # 运行所有爬虫")
146
+ console.print(" [blue]crawlo check[/blue] <爬虫名称> # 检查爬虫有效性")
147
147
 
148
148
  return 0
149
149
 
@@ -151,6 +151,6 @@ def main(args):
151
151
  if show_json:
152
152
  console.print_json(data={"success": False, "error": str(e)})
153
153
  else:
154
- console.print(f"[bold red]❌ Unexpected error:[/bold red] {e}")
155
- logger.exception("Exception during 'crawlo list'")
154
+ console.print(f"[bold red]❌ 意外错误:[/bold red] {e}")
155
+ logger.exception("执行 'crawlo list' 时发生异常")
156
156
  return 1