PyPI - crawlo - Versions diffs - 1.3.2__py3-none-any.whl → 1.3.3__py3-none-any.whl - Mend

crawlo 1.3.2py3-none-any.whl → 1.3.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of crawlo might be problematic. Click here for more details.

Files changed (219) hide show

crawlo/__init__.py +63 -63
crawlo/__version__.py +1 -1
crawlo/cli.py +75 -75
crawlo/commands/__init__.py +14 -14
crawlo/commands/check.py +594 -594
crawlo/commands/genspider.py +151 -151
crawlo/commands/help.py +138 -138
crawlo/commands/list.py +155 -155
crawlo/commands/run.py +322 -314
crawlo/commands/startproject.py +436 -436
crawlo/commands/stats.py +187 -187
crawlo/commands/utils.py +196 -196
crawlo/config.py +312 -312
crawlo/config_validator.py +277 -277
crawlo/core/__init__.py +2 -2
crawlo/core/engine.py +365 -365
crawlo/core/processor.py +40 -40
crawlo/core/scheduler.py +256 -256
crawlo/crawler.py +1166 -1168
crawlo/data/__init__.py +5 -5
crawlo/data/user_agents.py +194 -194
crawlo/downloader/__init__.py +273 -273
crawlo/downloader/aiohttp_downloader.py +226 -226
crawlo/downloader/cffi_downloader.py +245 -245
crawlo/downloader/httpx_downloader.py +259 -259
crawlo/downloader/hybrid_downloader.py +212 -212
crawlo/downloader/playwright_downloader.py +402 -402
crawlo/downloader/selenium_downloader.py +472 -472
crawlo/event.py +11 -11
crawlo/exceptions.py +81 -81
crawlo/extension/__init__.py +39 -39
crawlo/extension/health_check.py +141 -141
crawlo/extension/log_interval.py +57 -57
crawlo/extension/log_stats.py +81 -81
crawlo/extension/logging_extension.py +52 -45
crawlo/extension/memory_monitor.py +104 -104
crawlo/extension/performance_profiler.py +133 -133
crawlo/extension/request_recorder.py +107 -107
crawlo/filters/__init__.py +154 -154
crawlo/filters/aioredis_filter.py +234 -234
crawlo/filters/memory_filter.py +269 -269
crawlo/items/__init__.py +23 -23
crawlo/items/base.py +21 -21
crawlo/items/fields.py +52 -52
crawlo/items/items.py +104 -104
crawlo/middleware/__init__.py +21 -21
crawlo/middleware/default_header.py +132 -132
crawlo/middleware/download_delay.py +104 -104
crawlo/middleware/middleware_manager.py +135 -135
crawlo/middleware/offsite.py +123 -123
crawlo/middleware/proxy.py +386 -386
crawlo/middleware/request_ignore.py +86 -86
crawlo/middleware/response_code.py +163 -163
crawlo/middleware/response_filter.py +136 -136
crawlo/middleware/retry.py +124 -124
crawlo/middleware/simple_proxy.py +65 -65
crawlo/mode_manager.py +187 -187
crawlo/network/__init__.py +21 -21
crawlo/network/request.py +379 -379
crawlo/network/response.py +359 -359
crawlo/pipelines/__init__.py +21 -21
crawlo/pipelines/bloom_dedup_pipeline.py +156 -156
crawlo/pipelines/console_pipeline.py +39 -39
crawlo/pipelines/csv_pipeline.py +316 -316
crawlo/pipelines/database_dedup_pipeline.py +222 -222
crawlo/pipelines/json_pipeline.py +218 -218
crawlo/pipelines/memory_dedup_pipeline.py +115 -115
crawlo/pipelines/mongo_pipeline.py +131 -131
crawlo/pipelines/mysql_pipeline.py +318 -318
crawlo/pipelines/pipeline_manager.py +75 -75
crawlo/pipelines/redis_dedup_pipeline.py +166 -166
crawlo/project.py +325 -297
crawlo/queue/pqueue.py +37 -37
crawlo/queue/queue_manager.py +379 -379
crawlo/queue/redis_priority_queue.py +306 -306
crawlo/settings/__init__.py +7 -7
crawlo/settings/default_settings.py +225 -225
crawlo/settings/setting_manager.py +198 -198
crawlo/spider/__init__.py +639 -639
crawlo/stats_collector.py +59 -59
crawlo/subscriber.py +129 -129
crawlo/task_manager.py +30 -30
crawlo/templates/crawlo.cfg.tmpl +10 -10
crawlo/templates/project/__init__.py.tmpl +3 -3
crawlo/templates/project/items.py.tmpl +17 -17
crawlo/templates/project/middlewares.py.tmpl +118 -118
crawlo/templates/project/pipelines.py.tmpl +96 -96
crawlo/templates/project/settings.py.tmpl +266 -266
crawlo/templates/project/settings_distributed.py.tmpl +179 -179
crawlo/templates/project/settings_gentle.py.tmpl +60 -60
crawlo/templates/project/settings_high_performance.py.tmpl +130 -130
crawlo/templates/project/settings_minimal.py.tmpl +34 -34
crawlo/templates/project/settings_simple.py.tmpl +101 -101
crawlo/templates/project/spiders/__init__.py.tmpl +5 -5
crawlo/templates/run.py.tmpl +38 -38
crawlo/templates/spider/spider.py.tmpl +143 -143
crawlo/tools/__init__.py +200 -200
crawlo/tools/anti_crawler.py +268 -268
crawlo/tools/authenticated_proxy.py +240 -240
crawlo/tools/data_formatter.py +225 -225
crawlo/tools/data_validator.py +180 -180
crawlo/tools/date_tools.py +289 -289
crawlo/tools/distributed_coordinator.py +388 -388
crawlo/tools/encoding_converter.py +127 -127
crawlo/tools/request_tools.py +82 -82
crawlo/tools/retry_mechanism.py +224 -224
crawlo/tools/scenario_adapter.py +262 -262
crawlo/tools/text_cleaner.py +232 -232
crawlo/utils/__init__.py +34 -34
crawlo/utils/batch_processor.py +259 -259
crawlo/utils/controlled_spider_mixin.py +439 -439
crawlo/utils/db_helper.py +343 -343
crawlo/utils/enhanced_error_handler.py +356 -356
crawlo/utils/env_config.py +142 -142
crawlo/utils/error_handler.py +123 -123
crawlo/utils/func_tools.py +82 -82
crawlo/utils/large_scale_config.py +286 -286
crawlo/utils/large_scale_helper.py +344 -344
crawlo/utils/log.py +199 -146
crawlo/utils/performance_monitor.py +285 -285
crawlo/utils/queue_helper.py +175 -175
crawlo/utils/redis_connection_pool.py +351 -351
crawlo/utils/redis_key_validator.py +198 -198
crawlo/utils/request.py +267 -267
crawlo/utils/request_serializer.py +218 -218
crawlo/utils/spider_loader.py +61 -61
crawlo/utils/system.py +11 -11
crawlo/utils/tools.py +4 -4
crawlo/utils/url.py +39 -39
{crawlo-1.3.2.dist-info → crawlo-1.3.3.dist-info}/METADATA +1020 -1020
crawlo-1.3.3.dist-info/RECORD +219 -0
examples/__init__.py +7 -7
tests/DOUBLE_CRAWLO_PREFIX_FIX_REPORT.md +81 -81
tests/__init__.py +7 -7
tests/advanced_tools_example.py +275 -275
tests/authenticated_proxy_example.py +107 -107
tests/cleaners_example.py +160 -160
tests/config_validation_demo.py +142 -142
tests/controlled_spider_example.py +205 -205
tests/date_tools_example.py +180 -180
tests/debug_pipelines.py +66 -66
tests/dynamic_loading_example.py +523 -523
tests/dynamic_loading_test.py +104 -104
tests/env_config_example.py +133 -133
tests/error_handling_example.py +171 -171
tests/redis_key_validation_demo.py +130 -130
tests/request_params_example.py +150 -150
tests/response_improvements_example.py +144 -144
tests/test_advanced_tools.py +148 -148
tests/test_all_redis_key_configs.py +145 -145
tests/test_authenticated_proxy.py +141 -141
tests/test_cleaners.py +54 -54
tests/test_comprehensive.py +146 -146
tests/test_config_consistency.py +80 -80
tests/test_config_merge.py +152 -152
tests/test_config_validator.py +182 -182
tests/test_crawlo_proxy_integration.py +108 -108
tests/test_date_tools.py +123 -123
tests/test_default_header_middleware.py +158 -158
tests/test_distributed.py +65 -65
tests/test_double_crawlo_fix.py +207 -207
tests/test_double_crawlo_fix_simple.py +124 -124
tests/test_download_delay_middleware.py +221 -221
tests/test_downloader_proxy_compatibility.py +268 -268
tests/test_dynamic_downloaders_proxy.py +124 -124
tests/test_dynamic_proxy.py +92 -92
tests/test_dynamic_proxy_config.py +146 -146
tests/test_dynamic_proxy_real.py +109 -109
tests/test_edge_cases.py +303 -303
tests/test_enhanced_error_handler.py +270 -270
tests/test_env_config.py +121 -121
tests/test_error_handler_compatibility.py +112 -112
tests/test_final_validation.py +153 -153
tests/test_framework_env_usage.py +103 -103
tests/test_integration.py +169 -169
tests/test_item_dedup_redis_key.py +122 -122
tests/test_mode_consistency.py +51 -51
tests/test_offsite_middleware.py +221 -221
tests/test_parsel.py +29 -29
tests/test_performance.py +327 -327
tests/test_proxy_api.py +264 -264
tests/test_proxy_health_check.py +32 -32
tests/test_proxy_middleware.py +121 -121
tests/test_proxy_middleware_enhanced.py +216 -216
tests/test_proxy_middleware_integration.py +136 -136
tests/test_proxy_middleware_refactored.py +184 -184
tests/test_proxy_providers.py +56 -56
tests/test_proxy_stats.py +19 -19
tests/test_proxy_strategies.py +59 -59
tests/test_queue_manager_double_crawlo.py +173 -173
tests/test_queue_manager_redis_key.py +176 -176
tests/test_random_user_agent.py +72 -72
tests/test_real_scenario_proxy.py +195 -195
tests/test_redis_config.py +28 -28
tests/test_redis_connection_pool.py +294 -294
tests/test_redis_key_naming.py +181 -181
tests/test_redis_key_validator.py +123 -123
tests/test_redis_queue.py +224 -224
tests/test_request_ignore_middleware.py +182 -182
tests/test_request_params.py +111 -111
tests/test_request_serialization.py +70 -70
tests/test_response_code_middleware.py +349 -349
tests/test_response_filter_middleware.py +427 -427
tests/test_response_improvements.py +152 -152
tests/test_retry_middleware.py +241 -241
tests/test_scheduler.py +252 -252
tests/test_scheduler_config_update.py +133 -133
tests/test_simple_response.py +61 -61
tests/test_telecom_spider_redis_key.py +205 -205
tests/test_template_content.py +87 -87
tests/test_template_redis_key.py +134 -134
tests/test_tools.py +159 -159
tests/test_user_agents.py +96 -96
tests/tools_example.py +260 -260
tests/verify_distributed.py +117 -117
crawlo-1.3.2.dist-info/RECORD +0 -219
{crawlo-1.3.2.dist-info → crawlo-1.3.3.dist-info}/WHEEL +0 -0
{crawlo-1.3.2.dist-info → crawlo-1.3.3.dist-info}/entry_points.txt +0 -0
{crawlo-1.3.2.dist-info → crawlo-1.3.3.dist-info}/top_level.txt +0 -0

crawlo/commands/genspider.py CHANGED Viewed

@@ -1,152 +1,152 @@
-#!/usr/bin/python
-# -*- coding: UTF-8 -*-
-"""
-# @Time    : 2025-08-31 22:36
-# @Author  : crawl-coder
-# @Desc    : 命令行入口：crawlo genspider baidu，创建爬虫。
-"""
-import sys
-from pathlib import Path
-import configparser
-import importlib
-from rich.console import Console
-from .utils import (
-    get_project_root,
-    validate_project_environment,
-    show_error_panel,
-    show_success_panel,
-    validate_spider_name,
-    is_valid_domain
-)
-# 初始化 rich 控制台
-console = Console()
-TEMPLATES_DIR = Path(__file__).parent.parent / 'templates'
-def _render_template(tmpl_path, context):
-    """读取模板文件，替换 {{key}} 为 context 中的值"""
-    with open(tmpl_path, 'r', encoding='utf-8') as f:
-        content = f.read()
-    for key, value in context.items():
-        content = content.replace(f'{{{{{key}}}}}', str(value))
-    return content
-def main(args):
-    if len(args) < 2:
-        console.print("[bold red]错误:[/bold red] 用法: [blue]crawlo genspider[/blue] <爬虫名称> <域名>")
-        console.print("示例:")
-        console.print("   [blue]crawlo genspider[/blue] news_spider news.example.com")
-        console.print("   [blue]crawlo genspider[/blue] product_spider shop.example.com")
-        return 1
-    spider_name = args[0]
-    domain = args[1]
-    # 验证爬虫名称
-    if not validate_spider_name(spider_name):
-        show_error_panel(
-            "无效的爬虫名称",
-            f"爬虫名称 '[cyan]{spider_name}[/cyan]' 无效。\n"
-            "爬虫名称应:\n"
-            "  • 以小写字母开头\n"
-            "  • 只能包含小写字母、数字和下划线\n"
-            "  • 是有效的Python标识符"
-        )
-        return 1
-    # 验证域名格式
-    if not is_valid_domain(domain):
-        show_error_panel(
-            "无效的域名",
-            f"域名 '[cyan]{domain}[/cyan]' 格式无效。\n"
-            "请提供有效的域名，如 'example.com'"
-        )
-        return 1
-    # 验证项目环境
-    is_valid, project_package, error_msg = validate_project_environment()
-    if not is_valid:
-        show_error_panel("非Crawlo项目", error_msg)
-        return 1
-    project_root = get_project_root()
-    # 确定 items 模块的路径
-    items_module_path = f"{project_package}.items"
-    # 尝试导入 items 模块
-    default_item_class = "ExampleItem"  # 默认回退
-    try:
-        items_module = importlib.import_module(items_module_path)
-        # 获取模块中所有大写开头的类
-        item_classes = [
-            cls for cls in items_module.__dict__.values()
-            if isinstance(cls, type) and cls.__name__[0].isupper()  # 首字母大写
-        ]
-        if item_classes:
-            default_item_class = item_classes[0].__name__
-        else:
-            console.print("[yellow]警告:[/yellow] 在 [cyan]items.py[/cyan] 中未找到项目类，使用 [green]ExampleItem[/green]。")
-    except ImportError as e:
-        console.print(f"[yellow]警告:[/yellow] 导入 [cyan]{items_module_path}[/cyan] 失败: {e}")
-        # 仍使用默认 ExampleItem，不中断流程
-    # 创建爬虫文件
-    spiders_dir = project_root / project_package / 'spiders'
-    spiders_dir.mkdir(parents=True, exist_ok=True)
-    spider_file = spiders_dir / f'{spider_name}.py'
-    if spider_file.exists():
-        show_error_panel(
-            "爬虫已存在",
-            f"爬虫 '[cyan]{spider_name}[/cyan]' 已存在于\n[green]{spider_file}[/green]"
-        )
-        return 1
-    # 模板路径
-    tmpl_path = TEMPLATES_DIR / 'spider' / 'spider.py.tmpl'
-    if not tmpl_path.exists():
-        show_error_panel(
-            "模板未找到",
-            f"模板文件未找到于 [cyan]{tmpl_path}[/cyan]"
-        )
-        return 1
-    # 生成类名
-    class_name = f"{spider_name.replace('_', '').capitalize()}Spider"
-    context = {
-        'spider_name': spider_name,
-        'domain': domain,
-        'project_name': project_package,
-        'item_class': default_item_class,
-        'class_name': class_name
-    }
-    try:
-        content = _render_template(tmpl_path, context)
-        with open(spider_file, 'w', encoding='utf-8') as f:
-            f.write(content)
-        console.print(f"[green]爬虫 '[bold]{spider_name}[/bold]' 创建成功！[/green]")
-        console.print(f"  → 位置: [cyan]{spider_file}[/cyan]")
-        console.print(f"  → 类名: [yellow]{class_name}[/yellow]")
-        console.print(f"  → 域名: [blue]{domain}[/blue]")
-        console.print("\n[bold]下一步操作:[/bold]")
-        console.print(f"  [blue]crawlo run[/blue] {spider_name}")
-        console.print(f"  [blue]crawlo check[/blue] {spider_name}")
-        return 0
-    except Exception as e:
-        show_error_panel(
-            "创建失败",
-            f"创建爬虫失败: {e}"
-        )
+#!/usr/bin/python
+# -*- coding: UTF-8 -*-
+"""
+# @Time    : 2025-08-31 22:36
+# @Author  : crawl-coder
+# @Desc    : 命令行入口：crawlo genspider baidu，创建爬虫。
+"""
+import sys
+from pathlib import Path
+import configparser
+import importlib
+from rich.console import Console
+from .utils import (
+    get_project_root,
+    validate_project_environment,
+    show_error_panel,
+    show_success_panel,
+    validate_spider_name,
+    is_valid_domain
+)
+# 初始化 rich 控制台
+console = Console()
+TEMPLATES_DIR = Path(__file__).parent.parent / 'templates'
+def _render_template(tmpl_path, context):
+    """读取模板文件，替换 {{key}} 为 context 中的值"""
+    with open(tmpl_path, 'r', encoding='utf-8') as f:
+        content = f.read()
+    for key, value in context.items():
+        content = content.replace(f'{{{{{key}}}}}', str(value))
+    return content
+def main(args):
+    if len(args) < 2:
+        console.print("[bold red]错误:[/bold red] 用法: [blue]crawlo genspider[/blue] <爬虫名称> <域名>")
+        console.print("示例:")
+        console.print("   [blue]crawlo genspider[/blue] news_spider news.example.com")
+        console.print("   [blue]crawlo genspider[/blue] product_spider shop.example.com")
+        return 1
+    spider_name = args[0]
+    domain = args[1]
+    # 验证爬虫名称
+    if not validate_spider_name(spider_name):
+        show_error_panel(
+            "无效的爬虫名称",
+            f"爬虫名称 '[cyan]{spider_name}[/cyan]' 无效。\n"
+            "爬虫名称应:\n"
+            "  • 以小写字母开头\n"
+            "  • 只能包含小写字母、数字和下划线\n"
+            "  • 是有效的Python标识符"
+        )
+        return 1
+    # 验证域名格式
+    if not is_valid_domain(domain):
+        show_error_panel(
+            "无效的域名",
+            f"域名 '[cyan]{domain}[/cyan]' 格式无效。\n"
+            "请提供有效的域名，如 'example.com'"
+        )
+        return 1
+    # 验证项目环境
+    is_valid, project_package, error_msg = validate_project_environment()
+    if not is_valid:
+        show_error_panel("非Crawlo项目", error_msg)
+        return 1
+    project_root = get_project_root()
+    # 确定 items 模块的路径
+    items_module_path = f"{project_package}.items"
+    # 尝试导入 items 模块
+    default_item_class = "ExampleItem"  # 默认回退
+    try:
+        items_module = importlib.import_module(items_module_path)
+        # 获取模块中所有大写开头的类
+        item_classes = [
+            cls for cls in items_module.__dict__.values()
+            if isinstance(cls, type) and cls.__name__[0].isupper()  # 首字母大写
+        ]
+        if item_classes:
+            default_item_class = item_classes[0].__name__
+        else:
+            console.print("[yellow]警告:[/yellow] 在 [cyan]items.py[/cyan] 中未找到项目类，使用 [green]ExampleItem[/green]。")
+    except ImportError as e:
+        console.print(f"[yellow]警告:[/yellow] 导入 [cyan]{items_module_path}[/cyan] 失败: {e}")
+        # 仍使用默认 ExampleItem，不中断流程
+    # 创建爬虫文件
+    spiders_dir = project_root / project_package / 'spiders'
+    spiders_dir.mkdir(parents=True, exist_ok=True)
+    spider_file = spiders_dir / f'{spider_name}.py'
+    if spider_file.exists():
+        show_error_panel(
+            "爬虫已存在",
+            f"爬虫 '[cyan]{spider_name}[/cyan]' 已存在于\n[green]{spider_file}[/green]"
+        )
+        return 1
+    # 模板路径
+    tmpl_path = TEMPLATES_DIR / 'spider' / 'spider.py.tmpl'
+    if not tmpl_path.exists():
+        show_error_panel(
+            "模板未找到",
+            f"模板文件未找到于 [cyan]{tmpl_path}[/cyan]"
+        )
+        return 1
+    # 生成类名
+    class_name = f"{spider_name.replace('_', '').capitalize()}Spider"
+    context = {
+        'spider_name': spider_name,
+        'domain': domain,
+        'project_name': project_package,
+        'item_class': default_item_class,
+        'class_name': class_name
+    }
+    try:
+        content = _render_template(tmpl_path, context)
+        with open(spider_file, 'w', encoding='utf-8') as f:
+            f.write(content)
+        console.print(f"[green]爬虫 '[bold]{spider_name}[/bold]' 创建成功！[/green]")
+        console.print(f"  → 位置: [cyan]{spider_file}[/cyan]")
+        console.print(f"  → 类名: [yellow]{class_name}[/yellow]")
+        console.print(f"  → 域名: [blue]{domain}[/blue]")
+        console.print("\n[bold]下一步操作:[/bold]")
+        console.print(f"  [blue]crawlo run[/blue] {spider_name}")
+        console.print(f"  [blue]crawlo check[/blue] {spider_name}")
+        return 0
+    except Exception as e:
+        show_error_panel(
+            "创建失败",
+            f"创建爬虫失败: {e}"
+        )
         return 1

crawlo/commands/help.py CHANGED Viewed

@@ -1,139 +1,139 @@
-#!/usr/bin/python
-# -*- coding: UTF-8 -*-
-"""
-# @Time    : 2025-09-12
-# @Author  : crawl-coder
-# @Desc    : 命令行入口：crawlo -h|--help，显示帮助信息。
-"""
-import sys
-from rich.console import Console
-from rich.table import Table
-from rich.panel import Panel
-from rich.text import Text
-from rich import box
-from crawlo.utils.env_config import get_version
-# 获取框架版本号
-VERSION = get_version()
-console = Console()
-def main(args):
-    """
-    主函数：显示帮助信息
-    用法:
-        crawlo -h|--help
-    """
-    # 检查是否有无效参数
-    if args and args[0] not in ['-h', '--help', 'help']:
-        console.print("[bold red]无效参数:[/bold red] [yellow]{}[/yellow]".format(args[0]))
-        console.print("[bold blue]提示:[/bold blue] 使用 [green]crawlo -h[/green] 或 [green]crawlo --help[/green] 查看帮助信息")
-        return 1
-    # 显示帮助信息
-    show_help()
-    return 0
-def show_help():
-    """显示完整的帮助信息"""
-    # 显示框架标题和版本
-    console.print(Panel(
-        Text.from_markup(f"[bold blue]Crawlo[/bold blue] [bold white]v{VERSION}[/bold white] - 异步爬虫框架"),
-        expand=False,
-        border_style="blue"
-    ))
-    # 显示基本用法
-    console.print("[bold green]基本用法:[/bold green]")
-    console.print("  [blue]crawlo[/blue] [cyan]<command>[/cyan] [options]")
-    console.print()
-    # 显示可用命令
-    console.print("[bold green]可用命令:[/bold green]")
-    table = Table(box=box.SIMPLE, show_header=True, header_style="bold magenta")
-    table.add_column("命令", style="cyan", width=15)
-    table.add_column("描述", style="white")
-    table.add_column("用法", style="yellow")
-    table.add_row("startproject", "创建新项目", "crawlo startproject <project_name>")
-    table.add_row("genspider", "生成爬虫模板", "crawlo genspider <spider_name> [domain]")
-    table.add_row("run", "运行爬虫", "crawlo run <spider_name>|all [options]")
-    table.add_row("check", "检查爬虫代码", "crawlo check [options]")
-    table.add_row("list", "列出所有爬虫", "crawlo list")
-    table.add_row("stats", "查看统计信息", "crawlo stats [spider_name]")
-    table.add_row("help", "显示帮助信息", "crawlo -h|--help")
-    console.print(table)
-    console.print()
-    # 显示全局选项
-    console.print("[bold green]全局选项:[/bold green]")
-    table = Table(box=box.SIMPLE, show_header=False)
-    table.add_column("选项", style="cyan", width=15)
-    table.add_column("描述", style="white")
-    table.add_row("-h, --help", "显示帮助信息")
-    table.add_row("-v, --version", "显示版本信息")
-    console.print(table)
-    console.print()
-    # 显示各命令的详细用法
-    console.print("[bold green]命令详细用法:[/bold green]")
-    # run 命令
-    console.print("[bold cyan]run[/bold cyan] - 运行爬虫")
-    console.print("  用法: crawlo run <spider_name>|all [--json] [--no-stats]")
-    console.print("  示例:")
-    console.print("    crawlo run myspider")
-    console.print("    crawlo run all")
-    console.print("    crawlo run all --json --no-stats")
-    console.print()
-    # check 命令
-    console.print("[bold cyan]check[/bold cyan] - 检查爬虫代码")
-    console.print("  用法: crawlo check [--fix] [--ci] [--json] [--watch]")
-    console.print("  示例:")
-    console.print("    crawlo check")
-    console.print("    crawlo check --fix")
-    console.print("    crawlo check --ci --json")
-    console.print()
-    # startproject 命令
-    console.print("[bold cyan]startproject[/bold cyan] - 创建新项目")
-    console.print("  用法: crawlo startproject <project_name>")
-    console.print("  示例:")
-    console.print("    crawlo startproject myproject")
-    console.print()
-    # genspider 命令
-    console.print("[bold cyan]genspider[/bold cyan] - 生成爬虫模板")
-    console.print("  用法: crawlo genspider <spider_name> [domain]")
-    console.print("  示例:")
-    console.print("    crawlo genspider myspider example.com")
-    console.print()
-    # list 命令
-    console.print("[bold cyan]list[/bold cyan] - 列出所有爬虫")
-    console.print("  用法: crawlo list")
-    console.print("  示例:")
-    console.print("    crawlo list")
-    console.print()
-    # stats 命令
-    console.print("[bold cyan]stats[/bold cyan] - 查看统计信息")
-    console.print("  用法: crawlo stats [spider_name]")
-    console.print("  示例:")
-    console.print("    crawlo stats")
-    console.print("    crawlo stats myspider")
-    console.print()
-    # 显示更多信息
-    # console.print("[bold green]更多信息:[/bold green]")
-    # console.print("  文档: https://crawlo.readthedocs.io/")
-    # console.print("  源码: https://github.com/crawl-coder/Crawlo")
+#!/usr/bin/python
+# -*- coding: UTF-8 -*-
+"""
+# @Time    : 2025-09-12
+# @Author  : crawl-coder
+# @Desc    : 命令行入口：crawlo -h|--help，显示帮助信息。
+"""
+import sys
+from rich.console import Console
+from rich.table import Table
+from rich.panel import Panel
+from rich.text import Text
+from rich import box
+from crawlo.utils.env_config import get_version
+# 获取框架版本号
+VERSION = get_version()
+console = Console()
+def main(args):
+    """
+    主函数：显示帮助信息
+    用法:
+        crawlo -h|--help
+    """
+    # 检查是否有无效参数
+    if args and args[0] not in ['-h', '--help', 'help']:
+        console.print("[bold red]无效参数:[/bold red] [yellow]{}[/yellow]".format(args[0]))
+        console.print("[bold blue]提示:[/bold blue] 使用 [green]crawlo -h[/green] 或 [green]crawlo --help[/green] 查看帮助信息")
+        return 1
+    # 显示帮助信息
+    show_help()
+    return 0
+def show_help():
+    """显示完整的帮助信息"""
+    # 显示框架标题和版本
+    console.print(Panel(
+        Text.from_markup(f"[bold blue]Crawlo[/bold blue] [bold white]v{VERSION}[/bold white] - 异步爬虫框架"),
+        expand=False,
+        border_style="blue"
+    ))
+    # 显示基本用法
+    console.print("[bold green]基本用法:[/bold green]")
+    console.print("  [blue]crawlo[/blue] [cyan]<command>[/cyan] [options]")
+    console.print()
+    # 显示可用命令
+    console.print("[bold green]可用命令:[/bold green]")
+    table = Table(box=box.SIMPLE, show_header=True, header_style="bold magenta")
+    table.add_column("命令", style="cyan", width=15)
+    table.add_column("描述", style="white")
+    table.add_column("用法", style="yellow")
+    table.add_row("startproject", "创建新项目", "crawlo startproject <project_name>")
+    table.add_row("genspider", "生成爬虫模板", "crawlo genspider <spider_name> [domain]")
+    table.add_row("run", "运行爬虫", "crawlo run <spider_name>|all [options]")
+    table.add_row("check", "检查爬虫代码", "crawlo check [options]")
+    table.add_row("list", "列出所有爬虫", "crawlo list")
+    table.add_row("stats", "查看统计信息", "crawlo stats [spider_name]")
+    table.add_row("help", "显示帮助信息", "crawlo -h|--help")
+    console.print(table)
+    console.print()
+    # 显示全局选项
+    console.print("[bold green]全局选项:[/bold green]")
+    table = Table(box=box.SIMPLE, show_header=False)
+    table.add_column("选项", style="cyan", width=15)
+    table.add_column("描述", style="white")
+    table.add_row("-h, --help", "显示帮助信息")
+    table.add_row("-v, --version", "显示版本信息")
+    console.print(table)
+    console.print()
+    # 显示各命令的详细用法
+    console.print("[bold green]命令详细用法:[/bold green]")
+    # run 命令
+    console.print("[bold cyan]run[/bold cyan] - 运行爬虫")
+    console.print("  用法: crawlo run <spider_name>|all [--json] [--no-stats]")
+    console.print("  示例:")
+    console.print("    crawlo run myspider")
+    console.print("    crawlo run all")
+    console.print("    crawlo run all --json --no-stats")
+    console.print()
+    # check 命令
+    console.print("[bold cyan]check[/bold cyan] - 检查爬虫代码")
+    console.print("  用法: crawlo check [--fix] [--ci] [--json] [--watch]")
+    console.print("  示例:")
+    console.print("    crawlo check")
+    console.print("    crawlo check --fix")
+    console.print("    crawlo check --ci --json")
+    console.print()
+    # startproject 命令
+    console.print("[bold cyan]startproject[/bold cyan] - 创建新项目")
+    console.print("  用法: crawlo startproject <project_name>")
+    console.print("  示例:")
+    console.print("    crawlo startproject myproject")
+    console.print()
+    # genspider 命令
+    console.print("[bold cyan]genspider[/bold cyan] - 生成爬虫模板")
+    console.print("  用法: crawlo genspider <spider_name> [domain]")
+    console.print("  示例:")
+    console.print("    crawlo genspider myspider example.com")
+    console.print()
+    # list 命令
+    console.print("[bold cyan]list[/bold cyan] - 列出所有爬虫")
+    console.print("  用法: crawlo list")
+    console.print("  示例:")
+    console.print("    crawlo list")
+    console.print()
+    # stats 命令
+    console.print("[bold cyan]stats[/bold cyan] - 查看统计信息")
+    console.print("  用法: crawlo stats [spider_name]")
+    console.print("  示例:")
+    console.print("    crawlo stats")
+    console.print("    crawlo stats myspider")
+    console.print()
+    # 显示更多信息
+    # console.print("[bold green]更多信息:[/bold green]")
+    # console.print("  文档: https://crawlo.readthedocs.io/")
+    # console.print("  源码: https://github.com/crawl-coder/Crawlo")
     # console.print("  问题: https://github.com/crawl-coder/Crawlo/issues")

crawlo 1.3.2__py3-none-any.whl → 1.3.3__py3-none-any.whl

Potentially problematic release.

crawlo 1.3.2py3-none-any.whl → 1.3.3py3-none-any.whl