PyPI - crawlo - Versions diffs - 1.2.5__py3-none-any.whl → 1.2.7__py3-none-any.whl - Mend

crawlo 1.2.5py3-none-any.whl → 1.2.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of crawlo might be problematic. Click here for more details.

Files changed (209) hide show

crawlo/__init__.py +61 -61
crawlo/__version__.py +1 -1
crawlo/cleaners/__init__.py +60 -60
crawlo/cleaners/data_formatter.py +225 -225
crawlo/cleaners/encoding_converter.py +125 -125
crawlo/cleaners/text_cleaner.py +232 -232
crawlo/cli.py +75 -88
crawlo/commands/__init__.py +14 -14
crawlo/commands/check.py +594 -594
crawlo/commands/genspider.py +151 -151
crawlo/commands/help.py +138 -144
crawlo/commands/list.py +155 -155
crawlo/commands/run.py +323 -323
crawlo/commands/startproject.py +436 -436
crawlo/commands/stats.py +187 -187
crawlo/commands/utils.py +186 -186
crawlo/config.py +312 -312
crawlo/config_validator.py +251 -251
crawlo/core/__init__.py +2 -2
crawlo/core/engine.py +365 -354
crawlo/core/processor.py +40 -40
crawlo/core/scheduler.py +251 -143
crawlo/crawler.py +1099 -1110
crawlo/data/__init__.py +5 -5
crawlo/data/user_agents.py +107 -107
crawlo/downloader/__init__.py +266 -266
crawlo/downloader/aiohttp_downloader.py +228 -221
crawlo/downloader/cffi_downloader.py +256 -256
crawlo/downloader/httpx_downloader.py +259 -259
crawlo/downloader/hybrid_downloader.py +212 -212
crawlo/downloader/playwright_downloader.py +402 -402
crawlo/downloader/selenium_downloader.py +472 -472
crawlo/event.py +11 -11
crawlo/exceptions.py +81 -81
crawlo/extension/__init__.py +39 -38
crawlo/extension/health_check.py +141 -141
crawlo/extension/log_interval.py +57 -57
crawlo/extension/log_stats.py +81 -81
crawlo/extension/logging_extension.py +43 -43
crawlo/extension/memory_monitor.py +104 -104
crawlo/extension/performance_profiler.py +133 -133
crawlo/extension/request_recorder.py +107 -107
crawlo/filters/__init__.py +154 -154
crawlo/filters/aioredis_filter.py +234 -281
crawlo/filters/memory_filter.py +269 -269
crawlo/items/__init__.py +23 -23
crawlo/items/base.py +21 -21
crawlo/items/fields.py +52 -52
crawlo/items/items.py +104 -104
crawlo/middleware/__init__.py +21 -21
crawlo/middleware/default_header.py +131 -131
crawlo/middleware/download_delay.py +104 -104
crawlo/middleware/middleware_manager.py +136 -135
crawlo/middleware/offsite.py +114 -114
crawlo/middleware/proxy.py +367 -367
crawlo/middleware/request_ignore.py +86 -86
crawlo/middleware/response_code.py +163 -163
crawlo/middleware/response_filter.py +136 -136
crawlo/middleware/retry.py +124 -124
crawlo/mode_manager.py +211 -211
crawlo/network/__init__.py +21 -21
crawlo/network/request.py +338 -338
crawlo/network/response.py +359 -359
crawlo/pipelines/__init__.py +21 -21
crawlo/pipelines/bloom_dedup_pipeline.py +156 -156
crawlo/pipelines/console_pipeline.py +39 -39
crawlo/pipelines/csv_pipeline.py +316 -316
crawlo/pipelines/database_dedup_pipeline.py +222 -222
crawlo/pipelines/json_pipeline.py +218 -218
crawlo/pipelines/memory_dedup_pipeline.py +115 -115
crawlo/pipelines/mongo_pipeline.py +131 -131
crawlo/pipelines/mysql_pipeline.py +317 -317
crawlo/pipelines/pipeline_manager.py +62 -61
crawlo/pipelines/redis_dedup_pipeline.py +166 -165
crawlo/project.py +314 -279
crawlo/queue/pqueue.py +37 -37
crawlo/queue/queue_manager.py +377 -337
crawlo/queue/redis_priority_queue.py +306 -299
crawlo/settings/__init__.py +7 -7
crawlo/settings/default_settings.py +219 -217
crawlo/settings/setting_manager.py +122 -122
crawlo/spider/__init__.py +639 -639
crawlo/stats_collector.py +59 -59
crawlo/subscriber.py +129 -129
crawlo/task_manager.py +30 -30
crawlo/templates/crawlo.cfg.tmpl +10 -10
crawlo/templates/project/__init__.py.tmpl +3 -3
crawlo/templates/project/items.py.tmpl +17 -17
crawlo/templates/project/middlewares.py.tmpl +118 -118
crawlo/templates/project/pipelines.py.tmpl +96 -96
crawlo/templates/project/settings.py.tmpl +288 -324
crawlo/templates/project/settings_distributed.py.tmpl +157 -154
crawlo/templates/project/settings_gentle.py.tmpl +101 -128
crawlo/templates/project/settings_high_performance.py.tmpl +135 -150
crawlo/templates/project/settings_simple.py.tmpl +99 -103
crawlo/templates/project/spiders/__init__.py.tmpl +5 -5
crawlo/templates/run.py.tmpl +45 -47
crawlo/templates/spider/spider.py.tmpl +143 -143
crawlo/tools/__init__.py +182 -182
crawlo/tools/anti_crawler.py +268 -268
crawlo/tools/authenticated_proxy.py +240 -240
crawlo/tools/data_validator.py +180 -180
crawlo/tools/date_tools.py +35 -35
crawlo/tools/distributed_coordinator.py +386 -386
crawlo/tools/retry_mechanism.py +220 -220
crawlo/tools/scenario_adapter.py +262 -262
crawlo/utils/__init__.py +35 -35
crawlo/utils/batch_processor.py +259 -259
crawlo/utils/controlled_spider_mixin.py +439 -439
crawlo/utils/date_tools.py +290 -290
crawlo/utils/db_helper.py +343 -343
crawlo/utils/enhanced_error_handler.py +356 -356
crawlo/utils/env_config.py +143 -106
crawlo/utils/error_handler.py +123 -123
crawlo/utils/func_tools.py +82 -82
crawlo/utils/large_scale_config.py +286 -286
crawlo/utils/large_scale_helper.py +344 -344
crawlo/utils/log.py +128 -128
crawlo/utils/performance_monitor.py +285 -285
crawlo/utils/queue_helper.py +175 -175
crawlo/utils/redis_connection_pool.py +351 -334
crawlo/utils/redis_key_validator.py +198 -198
crawlo/utils/request.py +267 -267
crawlo/utils/request_serializer.py +218 -218
crawlo/utils/spider_loader.py +61 -61
crawlo/utils/system.py +11 -11
crawlo/utils/tools.py +4 -4
crawlo/utils/url.py +39 -39
{crawlo-1.2.5.dist-info → crawlo-1.2.7.dist-info}/METADATA +764 -764
crawlo-1.2.7.dist-info/RECORD +209 -0
examples/__init__.py +7 -7
tests/DOUBLE_CRAWLO_PREFIX_FIX_REPORT.md +81 -81
tests/__init__.py +7 -7
tests/advanced_tools_example.py +275 -275
tests/authenticated_proxy_example.py +236 -236
tests/cleaners_example.py +160 -160
tests/config_validation_demo.py +102 -102
tests/controlled_spider_example.py +205 -205
tests/date_tools_example.py +180 -180
tests/dynamic_loading_example.py +523 -523
tests/dynamic_loading_test.py +104 -104
tests/env_config_example.py +133 -133
tests/error_handling_example.py +171 -171
tests/redis_key_validation_demo.py +130 -130
tests/response_improvements_example.py +144 -144
tests/test_advanced_tools.py +148 -148
tests/test_all_redis_key_configs.py +145 -145
tests/test_authenticated_proxy.py +141 -141
tests/test_cleaners.py +54 -54
tests/test_comprehensive.py +146 -146
tests/test_config_consistency.py +81 -0
tests/test_config_validator.py +193 -193
tests/test_crawlo_proxy_integration.py +172 -172
tests/test_date_tools.py +123 -123
tests/test_default_header_middleware.py +158 -158
tests/test_double_crawlo_fix.py +207 -207
tests/test_double_crawlo_fix_simple.py +124 -124
tests/test_download_delay_middleware.py +221 -221
tests/test_downloader_proxy_compatibility.py +268 -268
tests/test_dynamic_downloaders_proxy.py +124 -124
tests/test_dynamic_proxy.py +92 -92
tests/test_dynamic_proxy_config.py +146 -146
tests/test_dynamic_proxy_real.py +109 -109
tests/test_edge_cases.py +303 -303
tests/test_enhanced_error_handler.py +270 -270
tests/test_env_config.py +121 -121
tests/test_error_handler_compatibility.py +112 -112
tests/test_final_validation.py +153 -153
tests/test_framework_env_usage.py +103 -103
tests/test_integration.py +356 -356
tests/test_item_dedup_redis_key.py +122 -122
tests/test_mode_consistency.py +52 -0
tests/test_offsite_middleware.py +221 -221
tests/test_parsel.py +29 -29
tests/test_performance.py +327 -327
tests/test_proxy_api.py +264 -264
tests/test_proxy_health_check.py +32 -32
tests/test_proxy_middleware.py +121 -121
tests/test_proxy_middleware_enhanced.py +216 -216
tests/test_proxy_middleware_integration.py +136 -136
tests/test_proxy_providers.py +56 -56
tests/test_proxy_stats.py +19 -19
tests/test_proxy_strategies.py +59 -59
tests/test_queue_manager_double_crawlo.py +173 -173
tests/test_queue_manager_redis_key.py +176 -176
tests/test_real_scenario_proxy.py +195 -195
tests/test_redis_config.py +28 -28
tests/test_redis_connection_pool.py +294 -294
tests/test_redis_key_naming.py +181 -181
tests/test_redis_key_validator.py +123 -123
tests/test_redis_queue.py +224 -224
tests/test_request_ignore_middleware.py +182 -182
tests/test_request_serialization.py +70 -70
tests/test_response_code_middleware.py +349 -349
tests/test_response_filter_middleware.py +427 -427
tests/test_response_improvements.py +152 -152
tests/test_retry_middleware.py +241 -241
tests/test_scheduler.py +252 -241
tests/test_scheduler_config_update.py +134 -0
tests/test_simple_response.py +61 -61
tests/test_telecom_spider_redis_key.py +205 -205
tests/test_template_content.py +87 -87
tests/test_template_redis_key.py +134 -134
tests/test_tools.py +153 -153
tests/tools_example.py +257 -257
crawlo-1.2.5.dist-info/RECORD +0 -206
{crawlo-1.2.5.dist-info → crawlo-1.2.7.dist-info}/WHEEL +0 -0
{crawlo-1.2.5.dist-info → crawlo-1.2.7.dist-info}/entry_points.txt +0 -0
{crawlo-1.2.5.dist-info → crawlo-1.2.7.dist-info}/top_level.txt +0 -0

crawlo/commands/check.py CHANGED Viewed

@@ -1,595 +1,595 @@
-#!/usr/bin/python
-# -*- coding: UTF-8 -*-
-"""
-# @Time    : 2025-08-31 22:35
-# @Author  : crawl-coder
-# @Desc    : 命令行入口：crawlo check，检查所有爬虫定义是否合规。
-"""
-import sys
-import ast
-import astor
-import re
-import time
-from pathlib import Path
-import configparser
-from importlib import import_module
-from rich.console import Console
-from rich.panel import Panel
-from rich.table import Table
-from rich.text import Text
-from rich import box
-from watchdog.observers import Observer
-from watchdog.events import FileSystemEventHandler
-from crawlo.crawler import CrawlerProcess
-from crawlo.utils.log import get_logger
-logger = get_logger(__name__)
-console = Console()
-def get_project_root():
-    """
-    从当前目录向上查找 crawlo.cfg，确定项目根目录
-    """
-    current = Path.cwd()
-    for _ in range(10):
-        cfg = current / "crawlo.cfg"
-        if cfg.exists():
-            return current
-        if current == current.parent:
-            break
-        current = current.parent
-    return None
-def auto_fix_spider_file(spider_cls, file_path: Path):
-    """自动修复 spider 文件中的常见问题"""
-    try:
-        with open(file_path, "r", encoding="utf-8") as f:
-            source = f.read()
-        fixed = False
-        tree = ast.parse(source)
-        # 查找 Spider 类定义
-        class_node = None
-        for node in ast.walk(tree):
-            if isinstance(node, ast.ClassDef) and node.name == spider_cls.__name__:
-                class_node = node
-                break
-        if not class_node:
-            return False, "在文件中找不到类定义。"
-        # 1. 修复 name 为空或缺失
-        name_assign = None
-        for node in class_node.body:
-            if isinstance(node, ast.Assign):
-                for target in node.targets:
-                    if isinstance(target, ast.Name) and target.id == "name":
-                        name_assign = node
-                        break
-        if not name_assign or (
-            isinstance(name_assign.value, ast.Constant) and not name_assign.value.value
-        ):
-            # 生成默认 name：类名转 snake_case
-            default_name = re.sub(r'(?<!^)(?=[A-Z])', '_', spider_cls.__name__).lower().replace("_spider", "")
-            new_assign = ast.Assign(
-                targets=[ast.Name(id="name", ctx=ast.Store())],
-                value=ast.Constant(value=default_name)
-            )
-            if name_assign:
-                index = class_node.body.index(name_assign)
-                class_node.body[index] = new_assign
-            else:
-                class_node.body.insert(0, new_assign)
-            fixed = True
-        # 2. 修复 start_urls 是字符串
-        start_urls_assign = None
-        for node in class_node.body:
-            if isinstance(node, ast.Assign):
-                for target in node.targets:
-                    if isinstance(target, ast.Name) and target.id == "start_urls":
-                        start_urls_assign = node
-                        break
-        if start_urls_assign and isinstance(start_urls_assign.value, ast.Constant) and isinstance(start_urls_assign.value.value, str):
-            new_value = ast.List(elts=[ast.Constant(value=start_urls_assign.value.value)], ctx=ast.Load())
-            start_urls_assign.value = new_value
-            fixed = True
-        # 3. 修复缺少 parse 方法
-        has_parse = any(
-            isinstance(node, ast.FunctionDef) and node.name == "parse"
-            for node in class_node.body
-        )
-        if not has_parse:
-            parse_method = ast.FunctionDef(
-                name="parse",
-                args=ast.arguments(
-                    posonlyargs=[],
-                    args=[ast.arg(arg="self"), ast.arg(arg="response")],
-                    kwonlyargs=[],
-                    kw_defaults=[],
-                    defaults=[],
-                    vararg=None,
-                    kwarg=None
-                ),
-                body=[
-                    ast.Expr(value=ast.Constant(value="默认 parse 方法，返回 item 或继续请求")),
-                    ast.Pass()
-                ],
-                decorator_list=[],
-                returns=None
-            )
-            class_node.body.append(parse_method)
-            fixed = True
-        # 4. 修复 allowed_domains 是字符串
-        allowed_domains_assign = None
-        for node in class_node.body:
-            if isinstance(node, ast.Assign):
-                for target in node.targets:
-                    if isinstance(target, ast.Name) and target.id == "allowed_domains":
-                        allowed_domains_assign = node
-                        break
-        if allowed_domains_assign and isinstance(allowed_domains_assign.value, ast.Constant) and isinstance(allowed_domains_assign.value.value, str):
-            new_value = ast.List(elts=[ast.Constant(value=allowed_domains_assign.value.value)], ctx=ast.Load())
-            allowed_domains_assign.value = new_value
-            fixed = True
-        # 5. 修复缺失 custom_settings
-        has_custom_settings = any(
-            isinstance(node, ast.Assign) and
-            any(isinstance(t, ast.Name) and t.id == "custom_settings" for t in node.targets)
-            for node in class_node.body
-        )
-        if not has_custom_settings:
-            new_assign = ast.Assign(
-                targets=[ast.Name(id="custom_settings", ctx=ast.Store())],
-                value=ast.Dict(keys=[], values=[])
-            )
-            # 插入在 name 之后
-            insert_index = 1
-            for i, node in enumerate(class_node.body):
-                if isinstance(node, ast.Assign) and any(
-                    isinstance(t, ast.Name) and t.id == "name" for t in node.targets
-                ):
-                    insert_index = i + 1
-                    break
-            class_node.body.insert(insert_index, new_assign)
-            fixed = True
-        # 6. 修复缺失 start_requests 方法
-        has_start_requests = any(
-            isinstance(node, ast.FunctionDef) and node.name == "start_requests"
-            for node in class_node.body
-        )
-        if not has_start_requests:
-            start_requests_method = ast.FunctionDef(
-                name="start_requests",
-                args=ast.arguments(
-                    posonlyargs=[],
-                    args=[ast.arg(arg="self")],
-                    kwonlyargs=[],
-                    kw_defaults=[],
-                    defaults=[],
-                    vararg=None,
-                    kwarg=None
-                ),
-                body=[
-                    ast.Expr(value=ast.Constant(value="默认 start_requests，从 start_urls 生成请求")),
-                    ast.For(
-                        target=ast.Name(id="url", ctx=ast.Store()),
-                        iter=ast.Attribute(value=ast.Name(id="self", ctx=ast.Load()), attr="start_urls", ctx=ast.Load()),
-                        body=[
-                            ast.Expr(
-                                value=ast.Call(
-                                    func=ast.Attribute(value=ast.Name(id="self", ctx=ast.Load()), attr="make_request", ctx=ast.Load()),
-                                    args=[ast.Name(id="url", ctx=ast.Load())],
-                                    keywords=[]
-                                )
-                            )
-                        ],
-                        orelse=[]
-                    )
-                ],
-                decorator_list=[],
-                returns=None
-            )
-            # 插入在 custom_settings 或 name 之后，parse 之前
-            insert_index = 2
-            for i, node in enumerate(class_node.body):
-                if isinstance(node, ast.FunctionDef) and node.name == "parse":
-                    insert_index = i
-                    break
-                elif isinstance(node, ast.Assign) and any(
-                    isinstance(t, ast.Name) and t.id in ("name", "custom_settings") for t in node.targets
-                ):
-                    insert_index = i + 1
-            class_node.body.insert(insert_index, start_requests_method)
-            fixed = True
-        if fixed:
-            fixed_source = astor.to_source(tree)
-            with open(file_path, "w", encoding="utf-8") as f:
-                f.write(fixed_source)
-            return True, "文件自动修复成功。"
-        else:
-            return False, "未找到可修复的问题。"
-    except Exception as e:
-        return False, f"自动修复失败: {e}"
-class SpiderChangeHandler(FileSystemEventHandler):
-    def __init__(self, project_root, spider_modules, show_fix=False, console=None):
-        self.project_root = project_root
-        self.spider_modules = spider_modules
-        self.show_fix = show_fix
-        self.console = console or Console()
-    def on_modified(self, event):
-        if event.is_directory:
-            return
-        if event.src_path.endswith(".py") and "spiders" in event.src_path:
-            file_path = Path(event.src_path)
-            spider_name = file_path.stem
-            self.console.print(f"\n:eyes: [bold blue]检测到变更[/bold blue] [cyan]{file_path}[/cyan]")
-            self.check_and_fix_spider(spider_name)
-    def check_and_fix_spider(self, spider_name):
-        try:
-            process = CrawlerProcess(spider_modules=self.spider_modules)
-            if spider_name not in process.get_spider_names():
-                self.console.print(f"[yellow]⚠️  {spider_name} 不是已注册的爬虫。[/yellow]")
-                return
-            cls = process.get_spider_class(spider_name)
-            issues = []
-            # 简化检查
-            if not getattr(cls, "name", None):
-                issues.append("缺少或为空的 'name' 属性")
-            if not callable(getattr(cls, "start_requests", None)):
-                issues.append("缺少 'start_requests' 方法")
-            if hasattr(cls, "start_urls") and isinstance(cls.start_urls, str):
-                issues.append("'start_urls' 是字符串")
-            if hasattr(cls, "allowed_domains") and isinstance(cls.allowed_domains, str):
-                issues.append("'allowed_domains' 是字符串")
-            try:
-                spider = cls.create_instance(None)
-                if not callable(getattr(spider, "parse", None)):
-                    issues.append("缺少 'parse' 方法")
-            except Exception:
-                issues.append("实例化失败")
-            if issues:
-                self.console.print(f"[red]❌ {spider_name} 存在问题:[/red]")
-                for issue in issues:
-                    self.console.print(f"  • {issue}")
-                if self.show_fix:
-                    file_path = Path(cls.__file__)
-                    fixed, msg = auto_fix_spider_file(cls, file_path)
-                    if fixed:
-                        self.console.print(f"[green]✅ 自动修复: {msg}[/green]")
-                    else:
-                        self.console.print(f"[yellow]⚠️  无法修复: {msg}[/yellow]")
-            else:
-                self.console.print(f"[green]✅ {spider_name} 合规。[/green]")
-        except Exception as e:
-            self.console.print(f"[red]❌ 检查 {spider_name} 时出错: {e}[/red]")
-def watch_spiders(project_root: Path, project_package: str, show_fix: bool):
-    """监听 spiders 目录变化并自动检查"""
-    spider_path = project_root / project_package / "spiders"
-    if not spider_path.exists():
-        console.print(f"[bold red]❌ Spider 目录未找到:[/bold red] {spider_path}")
-        return
-    spider_modules = [f"{project_package}.spiders"]
-    event_handler = SpiderChangeHandler(project_root, spider_modules, show_fix, console)
-    observer = Observer()
-    observer.schedule(event_handler, str(spider_path), recursive=False)
-    console.print(Panel(
-        f":eyes: [bold blue]监听[/bold blue] [cyan]{spider_path}[/cyan] 中的变更\n"
-        "编辑任何爬虫文件以触发自动检查...",
-        title="🚀 已启动监听模式",
-        border_style="blue"
-    ))
-    observer.start()
-    try:
-        while True:
-            time.sleep(1)
-    except KeyboardInterrupt:
-        console.print("\n[bold red]🛑 监听模式已停止。[/bold red]")
-        observer.stop()
-    observer.join()
-def main(args):
-    """
-    主函数：检查所有爬虫定义的合规性
-    用法:
-        crawlo check
-        crawlo check --fix
-        crawlo check --ci
-        crawlo check --json
-        crawlo check --watch
-    """
-    show_fix = "--fix" in args or "-f" in args
-    show_ci = "--ci" in args
-    show_json = "--json" in args
-    show_watch = "--watch" in args
-    valid_args = {"--fix", "-f", "--ci", "--json", "--watch"}
-    if any(arg not in valid_args for arg in args):
-        console.print("[bold red]❌ 错误:[/bold red] 用法: [blue]crawlo check[/blue] [--fix] [--ci] [--json] [--watch]")
-        return 1
-    try:
-        # 1. 查找项目根目录
-        project_root = get_project_root()
-        if not project_root:
-            msg = ":cross_mark: [bold red]找不到 'crawlo.cfg'[/bold red]\n💡 请在项目目录中运行此命令。"
-            if show_json:
-                console.print_json(data={"success": False, "error": "未找到项目根目录"})
-                return 1
-            elif show_ci:
-                console.print("❌ 未找到项目根目录。缺少 crawlo.cfg。")
-                return 1
-            else:
-                console.print(Panel(
-                    Text.from_markup(msg),
-                    title="❌ 非Crawlo项目",
-                    border_style="red",
-                    padding=(1, 2)
-                ))
-                return 1
-        project_root_str = str(project_root)
-        if project_root_str not in sys.path:
-            sys.path.insert(0, project_root_str)
-        # 2. 读取 crawlo.cfg
-        cfg_file = project_root / "crawlo.cfg"
-        if not cfg_file.exists():
-            msg = f"配置文件未找到: {cfg_file}"
-            if show_json:
-                console.print_json(data={"success": False, "error": msg})
-                return 1
-            elif show_ci:
-                console.print(f"❌ {msg}")
-                return 1
-            else:
-                console.print(Panel(msg, title="❌ 缺少配置文件", border_style="red"))
-                return 1
-        config = configparser.ConfigParser()
-        config.read(cfg_file, encoding="utf-8")
-        if not config.has_section("settings") or not config.has_option("settings", "default"):
-            msg = "crawlo.cfg 中缺少 [settings] 部分或 'default' 选项"
-            if show_json:
-                console.print_json(data={"success": False, "error": msg})
-                return 1
-            elif show_ci:
-                console.print(f"❌ {msg}")
-                return 1
-            else:
-                console.print(Panel(msg, title="❌ 无效配置", border_style="red"))
-                return 1
-        settings_module = config.get("settings", "default")
-        project_package = settings_module.split(".")[0]
-        # 3. 确保项目包可导入
-        try:
-            import_module(project_package)
-        except ImportError as e:
-            msg = f"导入项目包 '{project_package}' 失败: {e}"
-            if show_json:
-                console.print_json(data={"success": False, "error": msg})
-                return 1
-            elif show_ci:
-                console.print(f"❌ {msg}")
-                return 1
-            else:
-                console.print(Panel(msg, title="❌ 导入错误", border_style="red"))
-                return 1
-        # 4. 加载爬虫
-        spider_modules = [f"{project_package}.spiders"]
-        process = CrawlerProcess(spider_modules=spider_modules)
-        spider_names = process.get_spider_names()
-        if not spider_names:
-            msg = "未找到爬虫。"
-            if show_json:
-                console.print_json(data={"success": True, "warning": msg})
-                return 0
-            elif show_ci:
-                console.print("📭 未找到爬虫。")
-                return 0
-            else:
-                console.print(Panel(
-                    Text.from_markup(
-                        ":envelope_with_arrow: [bold]未找到爬虫[/bold]\n\n"
-                        "[bold]💡 确保:[/bold]\n"
-                        "  • 爬虫定义于 '[cyan]spiders[/cyan]' 模块\n"
-                        "  • 具有 [green]`name`[/green] 属性\n"
-                        "  • 模块已正确导入"
-                    ),
-                    title="📭 未找到爬虫",
-                    border_style="yellow",
-                    padding=(1, 2)
-                ))
-                return 0
-        # 5. 如果启用 watch 模式，启动监听
-        if show_watch:
-            console.print("[bold blue]:eyes: 启动监听模式...[/bold blue]")
-            watch_spiders(project_root, project_package, show_fix)
-            return 0  # watch 是长期运行，不返回
-        # 6. 开始检查（非 watch 模式）
-        if not show_ci and not show_json:
-            console.print(f":mag: [bold]正在检查 {len(spider_names)} 个爬虫...[/bold]\n")
-        issues_found = False
-        results = []
-        for name in sorted(spider_names):
-            cls = process.get_spider_class(name)
-            issues = []
-            # 检查 name 属性
-            if not getattr(cls, "name", None):
-                issues.append("缺少或为空的 'name' 属性")
-            elif not isinstance(cls.name, str):
-                issues.append("'name' 不是字符串")
-            # 检查 start_requests 是否可调用
-            if not callable(getattr(cls, "start_requests", None)):
-                issues.append("缺少或不可调用的 'start_requests' 方法")
-            # 检查 start_urls 类型（不应是字符串）
-            if hasattr(cls, "start_urls") and isinstance(cls.start_urls, str):
-                issues.append("'start_urls' 是字符串；应为列表或元组")
-            # 检查 allowed_domains 类型
-            if hasattr(cls, "allowed_domains") and isinstance(cls.allowed_domains, str):
-                issues.append("'allowed_domains' 是字符串；应为列表或元组")
-            # 实例化并检查 parse 方法
-            try:
-                spider = cls.create_instance(None)
-                if not callable(getattr(spider, "parse", None)):
-                    issues.append("未定义 'parse' 方法（推荐）")
-            except Exception as e:
-                issues.append(f"实例化爬虫失败: {e}")
-            # 自动修复（如果启用）
-            if issues and show_fix:
-                try:
-                    file_path = Path(cls.__file__)
-                    fixed, msg = auto_fix_spider_file(cls, file_path)
-                    if fixed:
-                        if not show_ci and not show_json:
-                            console.print(f"[green]🔧 已自动修复 {name} → {msg}[/green]")
-                        issues = []  # 认为已修复
-                    else:
-                        if not show_ci and not show_json:
-                            console.print(f"[yellow]⚠️  无法自动修复 {name}: {msg}[/yellow]")
-                except Exception as e:
-                    if not show_ci and not show_json:
-                        console.print(f"[yellow]⚠️  找不到 {name} 的源文件: {e}[/yellow]")
-            results.append({
-                "name": name,
-                "class": cls.__name__,
-                "file": getattr(cls, "__file__", "unknown"),
-                "issues": issues
-            })
-            if issues:
-                issues_found = True
-        # 7. 生成报告数据
-        report = {
-            "success": not issues_found,
-            "total_spiders": len(spider_names),
-            "issues": [
-                {"name": r["name"], "class": r["class"], "file": r["file"], "problems": r["issues"]}
-                for r in results if r["issues"]
-            ]
-        }
-        # 8. 输出（根据模式）
-        if show_json:
-            console.print_json(data=report)
-            return 1 if issues_found else 0
-        if show_ci:
-            if issues_found:
-                console.print("❌ 合规性检查失败。")
-                for r in results:
-                    if r["issues"]:
-                        console.print(f"  • {r['name']}: {', '.join(r['issues'])}")
-            else:
-                console.print("✅ 所有爬虫合规。")
-            return 1 if issues_found else 0
-        # 9. 默认 rich 输出
-        table = Table(
-            title="🔍 爬虫合规性检查结果",
-            box=box.ROUNDED,
-            show_header=True,
-            header_style="bold magenta",
-            title_style="bold green"
-        )
-        table.add_column("状态", style="bold", width=4)
-        table.add_column("名称", style="cyan")
-        table.add_column("类名", style="green")
-        table.add_column("问题", style="yellow", overflow="fold")
-        for res in results:
-            if res["issues"]:
-                status = "[red]❌[/red]"
-                issues_text = "\n".join(f"• {issue}" for issue in res["issues"])
-            else:
-                status = "[green]✅[/green]"
-                issues_text = "—"
-            table.add_row(status, res["name"], res["class"], issues_text)
-        console.print(table)
-        console.print()
-        if issues_found:
-            console.print(Panel(
-                ":warning: [bold red]一些爬虫存在问题。[/bold red]\n请在运行前修复这些问题。",
-                title="⚠️  合规性检查失败",
-                border_style="red",
-                padding=(1, 2)
-            ))
-            return 1
-        else:
-            console.print(Panel(
-                ":tada: [bold green]所有爬虫都合规且定义良好！[/bold green]\n准备开始爬取！ 🕷️🚀",
-                title="🎉 检查通过",
-                border_style="green",
-                padding=(1, 2)
-            ))
-            return 0
-    except Exception as e:
-        logger.exception("执行 'crawlo check' 时发生异常")
-        if show_json:
-            console.print_json(data={"success": False, "error": str(e)})
-        elif show_ci:
-            console.print(f"❌ 意外错误: {e}")
-        else:
-            console.print(f"[bold red]❌ 检查过程中发生意外错误:[/bold red] {e}")
-        return 1
-if __name__ == "__main__":
-    """
-    支持直接运行：
-        python -m crawlo.commands.check
-    """
+#!/usr/bin/python
+# -*- coding: UTF-8 -*-
+"""
+# @Time    : 2025-08-31 22:35
+# @Author  : crawl-coder
+# @Desc    : 命令行入口：crawlo check，检查所有爬虫定义是否合规。
+"""
+import sys
+import ast
+import astor
+import re
+import time
+from pathlib import Path
+import configparser
+from importlib import import_module
+from rich.console import Console
+from rich.panel import Panel
+from rich.table import Table
+from rich.text import Text
+from rich import box
+from watchdog.observers import Observer
+from watchdog.events import FileSystemEventHandler
+from crawlo.crawler import CrawlerProcess
+from crawlo.utils.log import get_logger
+logger = get_logger(__name__)
+console = Console()
+def get_project_root():
+    """
+    从当前目录向上查找 crawlo.cfg，确定项目根目录
+    """
+    current = Path.cwd()
+    for _ in range(10):
+        cfg = current / "crawlo.cfg"
+        if cfg.exists():
+            return current
+        if current == current.parent:
+            break
+        current = current.parent
+    return None
+def auto_fix_spider_file(spider_cls, file_path: Path):
+    """自动修复 spider 文件中的常见问题"""
+    try:
+        with open(file_path, "r", encoding="utf-8") as f:
+            source = f.read()
+        fixed = False
+        tree = ast.parse(source)
+        # 查找 Spider 类定义
+        class_node = None
+        for node in ast.walk(tree):
+            if isinstance(node, ast.ClassDef) and node.name == spider_cls.__name__:
+                class_node = node
+                break
+        if not class_node:
+            return False, "在文件中找不到类定义。"
+        # 1. 修复 name 为空或缺失
+        name_assign = None
+        for node in class_node.body:
+            if isinstance(node, ast.Assign):
+                for target in node.targets:
+                    if isinstance(target, ast.Name) and target.id == "name":
+                        name_assign = node
+                        break
+        if not name_assign or (
+            isinstance(name_assign.value, ast.Constant) and not name_assign.value.value
+        ):
+            # 生成默认 name：类名转 snake_case
+            default_name = re.sub(r'(?<!^)(?=[A-Z])', '_', spider_cls.__name__).lower().replace("_spider", "")
+            new_assign = ast.Assign(
+                targets=[ast.Name(id="name", ctx=ast.Store())],
+                value=ast.Constant(value=default_name)
+            )
+            if name_assign:
+                index = class_node.body.index(name_assign)
+                class_node.body[index] = new_assign
+            else:
+                class_node.body.insert(0, new_assign)
+            fixed = True
+        # 2. 修复 start_urls 是字符串
+        start_urls_assign = None
+        for node in class_node.body:
+            if isinstance(node, ast.Assign):
+                for target in node.targets:
+                    if isinstance(target, ast.Name) and target.id == "start_urls":
+                        start_urls_assign = node
+                        break
+        if start_urls_assign and isinstance(start_urls_assign.value, ast.Constant) and isinstance(start_urls_assign.value.value, str):
+            new_value = ast.List(elts=[ast.Constant(value=start_urls_assign.value.value)], ctx=ast.Load())
+            start_urls_assign.value = new_value
+            fixed = True
+        # 3. 修复缺少 parse 方法
+        has_parse = any(
+            isinstance(node, ast.FunctionDef) and node.name == "parse"
+            for node in class_node.body
+        )
+        if not has_parse:
+            parse_method = ast.FunctionDef(
+                name="parse",
+                args=ast.arguments(
+                    posonlyargs=[],
+                    args=[ast.arg(arg="self"), ast.arg(arg="response")],
+                    kwonlyargs=[],
+                    kw_defaults=[],
+                    defaults=[],
+                    vararg=None,
+                    kwarg=None
+                ),
+                body=[
+                    ast.Expr(value=ast.Constant(value="默认 parse 方法，返回 item 或继续请求")),
+                    ast.Pass()
+                ],
+                decorator_list=[],
+                returns=None
+            )
+            class_node.body.append(parse_method)
+            fixed = True
+        # 4. 修复 allowed_domains 是字符串
+        allowed_domains_assign = None
+        for node in class_node.body:
+            if isinstance(node, ast.Assign):
+                for target in node.targets:
+                    if isinstance(target, ast.Name) and target.id == "allowed_domains":
+                        allowed_domains_assign = node
+                        break
+        if allowed_domains_assign and isinstance(allowed_domains_assign.value, ast.Constant) and isinstance(allowed_domains_assign.value.value, str):
+            new_value = ast.List(elts=[ast.Constant(value=allowed_domains_assign.value.value)], ctx=ast.Load())
+            allowed_domains_assign.value = new_value
+            fixed = True
+        # 5. 修复缺失 custom_settings
+        has_custom_settings = any(
+            isinstance(node, ast.Assign) and
+            any(isinstance(t, ast.Name) and t.id == "custom_settings" for t in node.targets)
+            for node in class_node.body
+        )
+        if not has_custom_settings:
+            new_assign = ast.Assign(
+                targets=[ast.Name(id="custom_settings", ctx=ast.Store())],
+                value=ast.Dict(keys=[], values=[])
+            )
+            # 插入在 name 之后
+            insert_index = 1
+            for i, node in enumerate(class_node.body):
+                if isinstance(node, ast.Assign) and any(
+                    isinstance(t, ast.Name) and t.id == "name" for t in node.targets
+                ):
+                    insert_index = i + 1
+                    break
+            class_node.body.insert(insert_index, new_assign)
+            fixed = True
+        # 6. 修复缺失 start_requests 方法
+        has_start_requests = any(
+            isinstance(node, ast.FunctionDef) and node.name == "start_requests"
+            for node in class_node.body
+        )
+        if not has_start_requests:
+            start_requests_method = ast.FunctionDef(
+                name="start_requests",
+                args=ast.arguments(
+                    posonlyargs=[],
+                    args=[ast.arg(arg="self")],
+                    kwonlyargs=[],
+                    kw_defaults=[],
+                    defaults=[],
+                    vararg=None,
+                    kwarg=None
+                ),
+                body=[
+                    ast.Expr(value=ast.Constant(value="默认 start_requests，从 start_urls 生成请求")),
+                    ast.For(
+                        target=ast.Name(id="url", ctx=ast.Store()),
+                        iter=ast.Attribute(value=ast.Name(id="self", ctx=ast.Load()), attr="start_urls", ctx=ast.Load()),
+                        body=[
+                            ast.Expr(
+                                value=ast.Call(
+                                    func=ast.Attribute(value=ast.Name(id="self", ctx=ast.Load()), attr="make_request", ctx=ast.Load()),
+                                    args=[ast.Name(id="url", ctx=ast.Load())],
+                                    keywords=[]
+                                )
+                            )
+                        ],
+                        orelse=[]
+                    )
+                ],
+                decorator_list=[],
+                returns=None
+            )
+            # 插入在 custom_settings 或 name 之后，parse 之前
+            insert_index = 2
+            for i, node in enumerate(class_node.body):
+                if isinstance(node, ast.FunctionDef) and node.name == "parse":
+                    insert_index = i
+                    break
+                elif isinstance(node, ast.Assign) and any(
+                    isinstance(t, ast.Name) and t.id in ("name", "custom_settings") for t in node.targets
+                ):
+                    insert_index = i + 1
+            class_node.body.insert(insert_index, start_requests_method)
+            fixed = True
+        if fixed:
+            fixed_source = astor.to_source(tree)
+            with open(file_path, "w", encoding="utf-8") as f:
+                f.write(fixed_source)
+            return True, "文件自动修复成功。"
+        else:
+            return False, "未找到可修复的问题。"
+    except Exception as e:
+        return False, f"自动修复失败: {e}"
+class SpiderChangeHandler(FileSystemEventHandler):
+    def __init__(self, project_root, spider_modules, show_fix=False, console=None):
+        self.project_root = project_root
+        self.spider_modules = spider_modules
+        self.show_fix = show_fix
+        self.console = console or Console()
+    def on_modified(self, event):
+        if event.is_directory:
+            return
+        if event.src_path.endswith(".py") and "spiders" in event.src_path:
+            file_path = Path(event.src_path)
+            spider_name = file_path.stem
+            self.console.print(f"\n:eyes: [bold blue]检测到变更[/bold blue] [cyan]{file_path}[/cyan]")
+            self.check_and_fix_spider(spider_name)
+    def check_and_fix_spider(self, spider_name):
+        try:
+            process = CrawlerProcess(spider_modules=self.spider_modules)
+            if spider_name not in process.get_spider_names():
+                self.console.print(f"[yellow]⚠️  {spider_name} 不是已注册的爬虫。[/yellow]")
+                return
+            cls = process.get_spider_class(spider_name)
+            issues = []
+            # 简化检查
+            if not getattr(cls, "name", None):
+                issues.append("缺少或为空的 'name' 属性")
+            if not callable(getattr(cls, "start_requests", None)):
+                issues.append("缺少 'start_requests' 方法")
+            if hasattr(cls, "start_urls") and isinstance(cls.start_urls, str):
+                issues.append("'start_urls' 是字符串")
+            if hasattr(cls, "allowed_domains") and isinstance(cls.allowed_domains, str):
+                issues.append("'allowed_domains' 是字符串")
+            try:
+                spider = cls.create_instance(None)
+                if not callable(getattr(spider, "parse", None)):
+                    issues.append("缺少 'parse' 方法")
+            except Exception:
+                issues.append("实例化失败")
+            if issues:
+                self.console.print(f"[red]❌ {spider_name} 存在问题:[/red]")
+                for issue in issues:
+                    self.console.print(f"  • {issue}")
+                if self.show_fix:
+                    file_path = Path(cls.__file__)
+                    fixed, msg = auto_fix_spider_file(cls, file_path)
+                    if fixed:
+                        self.console.print(f"[green]✅ 自动修复: {msg}[/green]")
+                    else:
+                        self.console.print(f"[yellow]⚠️  无法修复: {msg}[/yellow]")
+            else:
+                self.console.print(f"[green]✅ {spider_name} 合规。[/green]")
+        except Exception as e:
+            self.console.print(f"[red]❌ 检查 {spider_name} 时出错: {e}[/red]")
+def watch_spiders(project_root: Path, project_package: str, show_fix: bool):
+    """监听 spiders 目录变化并自动检查"""
+    spider_path = project_root / project_package / "spiders"
+    if not spider_path.exists():
+        console.print(f"[bold red]❌ Spider 目录未找到:[/bold red] {spider_path}")
+        return
+    spider_modules = [f"{project_package}.spiders"]
+    event_handler = SpiderChangeHandler(project_root, spider_modules, show_fix, console)
+    observer = Observer()
+    observer.schedule(event_handler, str(spider_path), recursive=False)
+    console.print(Panel(
+        f":eyes: [bold blue]监听[/bold blue] [cyan]{spider_path}[/cyan] 中的变更\n"
+        "编辑任何爬虫文件以触发自动检查...",
+        title="🚀 已启动监听模式",
+        border_style="blue"
+    ))
+    observer.start()
+    try:
+        while True:
+            time.sleep(1)
+    except KeyboardInterrupt:
+        console.print("\n[bold red]🛑 监听模式已停止。[/bold red]")
+        observer.stop()
+    observer.join()
+def main(args):
+    """
+    主函数：检查所有爬虫定义的合规性
+    用法:
+        crawlo check
+        crawlo check --fix
+        crawlo check --ci
+        crawlo check --json
+        crawlo check --watch
+    """
+    show_fix = "--fix" in args or "-f" in args
+    show_ci = "--ci" in args
+    show_json = "--json" in args
+    show_watch = "--watch" in args
+    valid_args = {"--fix", "-f", "--ci", "--json", "--watch"}
+    if any(arg not in valid_args for arg in args):
+        console.print("[bold red]❌ 错误:[/bold red] 用法: [blue]crawlo check[/blue] [--fix] [--ci] [--json] [--watch]")
+        return 1
+    try:
+        # 1. 查找项目根目录
+        project_root = get_project_root()
+        if not project_root:
+            msg = ":cross_mark: [bold red]找不到 'crawlo.cfg'[/bold red]\n💡 请在项目目录中运行此命令。"
+            if show_json:
+                console.print_json(data={"success": False, "error": "未找到项目根目录"})
+                return 1
+            elif show_ci:
+                console.print("❌ 未找到项目根目录。缺少 crawlo.cfg。")
+                return 1
+            else:
+                console.print(Panel(
+                    Text.from_markup(msg),
+                    title="❌ 非Crawlo项目",
+                    border_style="red",
+                    padding=(1, 2)
+                ))
+                return 1
+        project_root_str = str(project_root)
+        if project_root_str not in sys.path:
+            sys.path.insert(0, project_root_str)
+        # 2. 读取 crawlo.cfg
+        cfg_file = project_root / "crawlo.cfg"
+        if not cfg_file.exists():
+            msg = f"配置文件未找到: {cfg_file}"
+            if show_json:
+                console.print_json(data={"success": False, "error": msg})
+                return 1
+            elif show_ci:
+                console.print(f"❌ {msg}")
+                return 1
+            else:
+                console.print(Panel(msg, title="❌ 缺少配置文件", border_style="red"))
+                return 1
+        config = configparser.ConfigParser()
+        config.read(cfg_file, encoding="utf-8")
+        if not config.has_section("settings") or not config.has_option("settings", "default"):
+            msg = "crawlo.cfg 中缺少 [settings] 部分或 'default' 选项"
+            if show_json:
+                console.print_json(data={"success": False, "error": msg})
+                return 1
+            elif show_ci:
+                console.print(f"❌ {msg}")
+                return 1
+            else:
+                console.print(Panel(msg, title="❌ 无效配置", border_style="red"))
+                return 1
+        settings_module = config.get("settings", "default")
+        project_package = settings_module.split(".")[0]
+        # 3. 确保项目包可导入
+        try:
+            import_module(project_package)
+        except ImportError as e:
+            msg = f"导入项目包 '{project_package}' 失败: {e}"
+            if show_json:
+                console.print_json(data={"success": False, "error": msg})
+                return 1
+            elif show_ci:
+                console.print(f"❌ {msg}")
+                return 1
+            else:
+                console.print(Panel(msg, title="❌ 导入错误", border_style="red"))
+                return 1
+        # 4. 加载爬虫
+        spider_modules = [f"{project_package}.spiders"]
+        process = CrawlerProcess(spider_modules=spider_modules)
+        spider_names = process.get_spider_names()
+        if not spider_names:
+            msg = "未找到爬虫。"
+            if show_json:
+                console.print_json(data={"success": True, "warning": msg})
+                return 0
+            elif show_ci:
+                console.print("📭 未找到爬虫。")
+                return 0
+            else:
+                console.print(Panel(
+                    Text.from_markup(
+                        ":envelope_with_arrow: [bold]未找到爬虫[/bold]\n\n"
+                        "[bold]💡 确保:[/bold]\n"
+                        "  • 爬虫定义于 '[cyan]spiders[/cyan]' 模块\n"
+                        "  • 具有 [green]`name`[/green] 属性\n"
+                        "  • 模块已正确导入"
+                    ),
+                    title="📭 未找到爬虫",
+                    border_style="yellow",
+                    padding=(1, 2)
+                ))
+                return 0
+        # 5. 如果启用 watch 模式，启动监听
+        if show_watch:
+            console.print("[bold blue]:eyes: 启动监听模式...[/bold blue]")
+            watch_spiders(project_root, project_package, show_fix)
+            return 0  # watch 是长期运行，不返回
+        # 6. 开始检查（非 watch 模式）
+        if not show_ci and not show_json:
+            console.print(f":mag: [bold]正在检查 {len(spider_names)} 个爬虫...[/bold]\n")
+        issues_found = False
+        results = []
+        for name in sorted(spider_names):
+            cls = process.get_spider_class(name)
+            issues = []
+            # 检查 name 属性
+            if not getattr(cls, "name", None):
+                issues.append("缺少或为空的 'name' 属性")
+            elif not isinstance(cls.name, str):
+                issues.append("'name' 不是字符串")
+            # 检查 start_requests 是否可调用
+            if not callable(getattr(cls, "start_requests", None)):
+                issues.append("缺少或不可调用的 'start_requests' 方法")
+            # 检查 start_urls 类型（不应是字符串）
+            if hasattr(cls, "start_urls") and isinstance(cls.start_urls, str):
+                issues.append("'start_urls' 是字符串；应为列表或元组")
+            # 检查 allowed_domains 类型
+            if hasattr(cls, "allowed_domains") and isinstance(cls.allowed_domains, str):
+                issues.append("'allowed_domains' 是字符串；应为列表或元组")
+            # 实例化并检查 parse 方法
+            try:
+                spider = cls.create_instance(None)
+                if not callable(getattr(spider, "parse", None)):
+                    issues.append("未定义 'parse' 方法（推荐）")
+            except Exception as e:
+                issues.append(f"实例化爬虫失败: {e}")
+            # 自动修复（如果启用）
+            if issues and show_fix:
+                try:
+                    file_path = Path(cls.__file__)
+                    fixed, msg = auto_fix_spider_file(cls, file_path)
+                    if fixed:
+                        if not show_ci and not show_json:
+                            console.print(f"[green]🔧 已自动修复 {name} → {msg}[/green]")
+                        issues = []  # 认为已修复
+                    else:
+                        if not show_ci and not show_json:
+                            console.print(f"[yellow]⚠️  无法自动修复 {name}: {msg}[/yellow]")
+                except Exception as e:
+                    if not show_ci and not show_json:
+                        console.print(f"[yellow]⚠️  找不到 {name} 的源文件: {e}[/yellow]")
+            results.append({
+                "name": name,
+                "class": cls.__name__,
+                "file": getattr(cls, "__file__", "unknown"),
+                "issues": issues
+            })
+            if issues:
+                issues_found = True
+        # 7. 生成报告数据
+        report = {
+            "success": not issues_found,
+            "total_spiders": len(spider_names),
+            "issues": [
+                {"name": r["name"], "class": r["class"], "file": r["file"], "problems": r["issues"]}
+                for r in results if r["issues"]
+            ]
+        }
+        # 8. 输出（根据模式）
+        if show_json:
+            console.print_json(data=report)
+            return 1 if issues_found else 0
+        if show_ci:
+            if issues_found:
+                console.print("❌ 合规性检查失败。")
+                for r in results:
+                    if r["issues"]:
+                        console.print(f"  • {r['name']}: {', '.join(r['issues'])}")
+            else:
+                console.print("✅ 所有爬虫合规。")
+            return 1 if issues_found else 0
+        # 9. 默认 rich 输出
+        table = Table(
+            title="🔍 爬虫合规性检查结果",
+            box=box.ROUNDED,
+            show_header=True,
+            header_style="bold magenta",
+            title_style="bold green"
+        )
+        table.add_column("状态", style="bold", width=4)
+        table.add_column("名称", style="cyan")
+        table.add_column("类名", style="green")
+        table.add_column("问题", style="yellow", overflow="fold")
+        for res in results:
+            if res["issues"]:
+                status = "[red]❌[/red]"
+                issues_text = "\n".join(f"• {issue}" for issue in res["issues"])
+            else:
+                status = "[green]✅[/green]"
+                issues_text = "—"
+            table.add_row(status, res["name"], res["class"], issues_text)
+        console.print(table)
+        console.print()
+        if issues_found:
+            console.print(Panel(
+                ":warning: [bold red]一些爬虫存在问题。[/bold red]\n请在运行前修复这些问题。",
+                title="⚠️  合规性检查失败",
+                border_style="red",
+                padding=(1, 2)
+            ))
+            return 1
+        else:
+            console.print(Panel(
+                ":tada: [bold green]所有爬虫都合规且定义良好！[/bold green]\n准备开始爬取！ 🕷️🚀",
+                title="🎉 检查通过",
+                border_style="green",
+                padding=(1, 2)
+            ))
+            return 0
+    except Exception as e:
+        logger.exception("执行 'crawlo check' 时发生异常")
+        if show_json:
+            console.print_json(data={"success": False, "error": str(e)})
+        elif show_ci:
+            console.print(f"❌ 意外错误: {e}")
+        else:
+            console.print(f"[bold red]❌ 检查过程中发生意外错误:[/bold red] {e}")
+        return 1
+if __name__ == "__main__":
+    """
+    支持直接运行：
+        python -m crawlo.commands.check
+    """
     sys.exit(main(sys.argv[1:]))

crawlo 1.2.5__py3-none-any.whl → 1.2.7__py3-none-any.whl

Potentially problematic release.

crawlo 1.2.5py3-none-any.whl → 1.2.7py3-none-any.whl