PyPI - jarvis-ai-assistant - Versions diffs - 0.7.16__py3-none-any.whl → 1.0.2__py3-none-any.whl - Mend

jarvis-ai-assistant 0.7.16py3-none-any.whl → 1.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (279) hide show

jarvis/__init__.py +1 -1
jarvis/jarvis_agent/__init__.py +567 -222
jarvis/jarvis_agent/agent_manager.py +19 -12
jarvis/jarvis_agent/builtin_input_handler.py +79 -11
jarvis/jarvis_agent/config_editor.py +7 -2
jarvis/jarvis_agent/event_bus.py +24 -13
jarvis/jarvis_agent/events.py +19 -1
jarvis/jarvis_agent/file_context_handler.py +67 -64
jarvis/jarvis_agent/file_methodology_manager.py +38 -24
jarvis/jarvis_agent/jarvis.py +186 -114
jarvis/jarvis_agent/language_extractors/__init__.py +8 -1
jarvis/jarvis_agent/language_extractors/c_extractor.py +7 -4
jarvis/jarvis_agent/language_extractors/cpp_extractor.py +9 -4
jarvis/jarvis_agent/language_extractors/go_extractor.py +7 -4
jarvis/jarvis_agent/language_extractors/java_extractor.py +27 -20
jarvis/jarvis_agent/language_extractors/javascript_extractor.py +22 -17
jarvis/jarvis_agent/language_extractors/python_extractor.py +7 -4
jarvis/jarvis_agent/language_extractors/rust_extractor.py +7 -4
jarvis/jarvis_agent/language_extractors/typescript_extractor.py +22 -17
jarvis/jarvis_agent/language_support_info.py +250 -219
jarvis/jarvis_agent/main.py +19 -23
jarvis/jarvis_agent/memory_manager.py +9 -6
jarvis/jarvis_agent/methodology_share_manager.py +21 -15
jarvis/jarvis_agent/output_handler.py +4 -2
jarvis/jarvis_agent/prompt_builder.py +7 -6
jarvis/jarvis_agent/prompt_manager.py +113 -8
jarvis/jarvis_agent/prompts.py +317 -85
jarvis/jarvis_agent/protocols.py +5 -2
jarvis/jarvis_agent/run_loop.py +192 -32
jarvis/jarvis_agent/session_manager.py +7 -3
jarvis/jarvis_agent/share_manager.py +23 -13
jarvis/jarvis_agent/shell_input_handler.py +12 -8
jarvis/jarvis_agent/stdio_redirect.py +25 -26
jarvis/jarvis_agent/task_analyzer.py +29 -23
jarvis/jarvis_agent/task_list.py +869 -0
jarvis/jarvis_agent/task_manager.py +26 -23
jarvis/jarvis_agent/tool_executor.py +6 -5
jarvis/jarvis_agent/tool_share_manager.py +24 -14
jarvis/jarvis_agent/user_interaction.py +3 -3
jarvis/jarvis_agent/utils.py +9 -1
jarvis/jarvis_agent/web_bridge.py +37 -17
jarvis/jarvis_agent/web_output_sink.py +5 -2
jarvis/jarvis_agent/web_server.py +165 -36
jarvis/jarvis_c2rust/__init__.py +1 -1
jarvis/jarvis_c2rust/cli.py +260 -141
jarvis/jarvis_c2rust/collector.py +37 -18
jarvis/jarvis_c2rust/constants.py +60 -0
jarvis/jarvis_c2rust/library_replacer.py +242 -1010
jarvis/jarvis_c2rust/library_replacer_checkpoint.py +133 -0
jarvis/jarvis_c2rust/library_replacer_llm.py +287 -0
jarvis/jarvis_c2rust/library_replacer_loader.py +191 -0
jarvis/jarvis_c2rust/library_replacer_output.py +134 -0
jarvis/jarvis_c2rust/library_replacer_prompts.py +124 -0
jarvis/jarvis_c2rust/library_replacer_utils.py +188 -0
jarvis/jarvis_c2rust/llm_module_agent.py +98 -1044
jarvis/jarvis_c2rust/llm_module_agent_apply.py +170 -0
jarvis/jarvis_c2rust/llm_module_agent_executor.py +288 -0
jarvis/jarvis_c2rust/llm_module_agent_loader.py +170 -0
jarvis/jarvis_c2rust/llm_module_agent_prompts.py +268 -0
jarvis/jarvis_c2rust/llm_module_agent_types.py +57 -0
jarvis/jarvis_c2rust/llm_module_agent_utils.py +150 -0
jarvis/jarvis_c2rust/llm_module_agent_validator.py +119 -0
jarvis/jarvis_c2rust/loaders.py +28 -10
jarvis/jarvis_c2rust/models.py +5 -2
jarvis/jarvis_c2rust/optimizer.py +192 -1974
jarvis/jarvis_c2rust/optimizer_build_fix.py +286 -0
jarvis/jarvis_c2rust/optimizer_clippy.py +766 -0
jarvis/jarvis_c2rust/optimizer_config.py +49 -0
jarvis/jarvis_c2rust/optimizer_docs.py +183 -0
jarvis/jarvis_c2rust/optimizer_options.py +48 -0
jarvis/jarvis_c2rust/optimizer_progress.py +469 -0
jarvis/jarvis_c2rust/optimizer_report.py +52 -0
jarvis/jarvis_c2rust/optimizer_unsafe.py +309 -0
jarvis/jarvis_c2rust/optimizer_utils.py +469 -0
jarvis/jarvis_c2rust/optimizer_visibility.py +185 -0
jarvis/jarvis_c2rust/scanner.py +229 -166
jarvis/jarvis_c2rust/transpiler.py +531 -2732
jarvis/jarvis_c2rust/transpiler_agents.py +503 -0
jarvis/jarvis_c2rust/transpiler_build.py +1294 -0
jarvis/jarvis_c2rust/transpiler_codegen.py +204 -0
jarvis/jarvis_c2rust/transpiler_compile.py +146 -0
jarvis/jarvis_c2rust/transpiler_config.py +178 -0
jarvis/jarvis_c2rust/transpiler_context.py +122 -0
jarvis/jarvis_c2rust/transpiler_executor.py +516 -0
jarvis/jarvis_c2rust/transpiler_generation.py +278 -0
jarvis/jarvis_c2rust/transpiler_git.py +163 -0
jarvis/jarvis_c2rust/transpiler_mod_utils.py +225 -0
jarvis/jarvis_c2rust/transpiler_modules.py +336 -0
jarvis/jarvis_c2rust/transpiler_planning.py +394 -0
jarvis/jarvis_c2rust/transpiler_review.py +1196 -0
jarvis/jarvis_c2rust/transpiler_symbols.py +176 -0
jarvis/jarvis_c2rust/utils.py +269 -79
jarvis/jarvis_code_agent/after_change.py +233 -0
jarvis/jarvis_code_agent/build_validation_config.py +37 -30
jarvis/jarvis_code_agent/builtin_rules.py +68 -0
jarvis/jarvis_code_agent/code_agent.py +976 -1517
jarvis/jarvis_code_agent/code_agent_build.py +227 -0
jarvis/jarvis_code_agent/code_agent_diff.py +246 -0
jarvis/jarvis_code_agent/code_agent_git.py +525 -0
jarvis/jarvis_code_agent/code_agent_impact.py +177 -0
jarvis/jarvis_code_agent/code_agent_lint.py +283 -0
jarvis/jarvis_code_agent/code_agent_llm.py +159 -0
jarvis/jarvis_code_agent/code_agent_postprocess.py +105 -0
jarvis/jarvis_code_agent/code_agent_prompts.py +46 -0
jarvis/jarvis_code_agent/code_agent_rules.py +305 -0
jarvis/jarvis_code_agent/code_analyzer/__init__.py +52 -48
jarvis/jarvis_code_agent/code_analyzer/base_language.py +12 -10
jarvis/jarvis_code_agent/code_analyzer/build_validator/__init__.py +12 -11
jarvis/jarvis_code_agent/code_analyzer/build_validator/base.py +16 -12
jarvis/jarvis_code_agent/code_analyzer/build_validator/cmake.py +26 -17
jarvis/jarvis_code_agent/code_analyzer/build_validator/detector.py +558 -104
jarvis/jarvis_code_agent/code_analyzer/build_validator/fallback.py +27 -16
jarvis/jarvis_code_agent/code_analyzer/build_validator/go.py +22 -18
jarvis/jarvis_code_agent/code_analyzer/build_validator/java_gradle.py +21 -16
jarvis/jarvis_code_agent/code_analyzer/build_validator/java_maven.py +20 -16
jarvis/jarvis_code_agent/code_analyzer/build_validator/makefile.py +27 -16
jarvis/jarvis_code_agent/code_analyzer/build_validator/nodejs.py +47 -23
jarvis/jarvis_code_agent/code_analyzer/build_validator/python.py +71 -37
jarvis/jarvis_code_agent/code_analyzer/build_validator/rust.py +162 -35
jarvis/jarvis_code_agent/code_analyzer/build_validator/validator.py +111 -57
jarvis/jarvis_code_agent/code_analyzer/build_validator.py +18 -12
jarvis/jarvis_code_agent/code_analyzer/context_manager.py +185 -183
jarvis/jarvis_code_agent/code_analyzer/context_recommender.py +2 -1
jarvis/jarvis_code_agent/code_analyzer/dependency_analyzer.py +24 -15
jarvis/jarvis_code_agent/code_analyzer/file_ignore.py +227 -141
jarvis/jarvis_code_agent/code_analyzer/impact_analyzer.py +321 -247
jarvis/jarvis_code_agent/code_analyzer/language_registry.py +37 -29
jarvis/jarvis_code_agent/code_analyzer/language_support.py +21 -13
jarvis/jarvis_code_agent/code_analyzer/languages/__init__.py +15 -9
jarvis/jarvis_code_agent/code_analyzer/languages/c_cpp_language.py +75 -45
jarvis/jarvis_code_agent/code_analyzer/languages/go_language.py +87 -52
jarvis/jarvis_code_agent/code_analyzer/languages/java_language.py +84 -51
jarvis/jarvis_code_agent/code_analyzer/languages/javascript_language.py +94 -64
jarvis/jarvis_code_agent/code_analyzer/languages/python_language.py +109 -71
jarvis/jarvis_code_agent/code_analyzer/languages/rust_language.py +97 -63
jarvis/jarvis_code_agent/code_analyzer/languages/typescript_language.py +103 -69
jarvis/jarvis_code_agent/code_analyzer/llm_context_recommender.py +271 -268
jarvis/jarvis_code_agent/code_analyzer/symbol_extractor.py +76 -64
jarvis/jarvis_code_agent/code_analyzer/tree_sitter_extractor.py +92 -19
jarvis/jarvis_code_agent/diff_visualizer.py +998 -0
jarvis/jarvis_code_agent/lint.py +223 -524
jarvis/jarvis_code_agent/rule_share_manager.py +158 -0
jarvis/jarvis_code_agent/rules/clean_code.md +144 -0
jarvis/jarvis_code_agent/rules/code_review.md +115 -0
jarvis/jarvis_code_agent/rules/documentation.md +165 -0
jarvis/jarvis_code_agent/rules/generate_rules.md +52 -0
jarvis/jarvis_code_agent/rules/performance.md +158 -0
jarvis/jarvis_code_agent/rules/refactoring.md +139 -0
jarvis/jarvis_code_agent/rules/security.md +160 -0
jarvis/jarvis_code_agent/rules/tdd.md +78 -0
jarvis/jarvis_code_agent/test_rules/cpp_test.md +118 -0
jarvis/jarvis_code_agent/test_rules/go_test.md +98 -0
jarvis/jarvis_code_agent/test_rules/java_test.md +99 -0
jarvis/jarvis_code_agent/test_rules/javascript_test.md +113 -0
jarvis/jarvis_code_agent/test_rules/php_test.md +117 -0
jarvis/jarvis_code_agent/test_rules/python_test.md +91 -0
jarvis/jarvis_code_agent/test_rules/ruby_test.md +102 -0
jarvis/jarvis_code_agent/test_rules/rust_test.md +86 -0
jarvis/jarvis_code_agent/utils.py +36 -26
jarvis/jarvis_code_analysis/checklists/loader.py +21 -21
jarvis/jarvis_code_analysis/code_review.py +64 -33
jarvis/jarvis_data/config_schema.json +285 -192
jarvis/jarvis_git_squash/main.py +8 -6
jarvis/jarvis_git_utils/git_commiter.py +53 -76
jarvis/jarvis_mcp/__init__.py +5 -2
jarvis/jarvis_mcp/sse_mcp_client.py +40 -30
jarvis/jarvis_mcp/stdio_mcp_client.py +27 -19
jarvis/jarvis_mcp/streamable_mcp_client.py +35 -26
jarvis/jarvis_memory_organizer/memory_organizer.py +78 -55
jarvis/jarvis_methodology/main.py +48 -39
jarvis/jarvis_multi_agent/__init__.py +56 -23
jarvis/jarvis_multi_agent/main.py +15 -18
jarvis/jarvis_platform/base.py +179 -111
jarvis/jarvis_platform/human.py +27 -16
jarvis/jarvis_platform/kimi.py +52 -45
jarvis/jarvis_platform/openai.py +101 -40
jarvis/jarvis_platform/registry.py +51 -33
jarvis/jarvis_platform/tongyi.py +68 -38
jarvis/jarvis_platform/yuanbao.py +59 -43
jarvis/jarvis_platform_manager/main.py +68 -76
jarvis/jarvis_platform_manager/service.py +24 -14
jarvis/jarvis_rag/README_CONFIG.md +314 -0
jarvis/jarvis_rag/README_DYNAMIC_LOADING.md +311 -0
jarvis/jarvis_rag/README_ONLINE_MODELS.md +230 -0
jarvis/jarvis_rag/__init__.py +57 -4
jarvis/jarvis_rag/cache.py +3 -1
jarvis/jarvis_rag/cli.py +48 -68
jarvis/jarvis_rag/embedding_interface.py +39 -0
jarvis/jarvis_rag/embedding_manager.py +7 -230
jarvis/jarvis_rag/embeddings/__init__.py +41 -0
jarvis/jarvis_rag/embeddings/base.py +114 -0
jarvis/jarvis_rag/embeddings/cohere.py +66 -0
jarvis/jarvis_rag/embeddings/edgefn.py +117 -0
jarvis/jarvis_rag/embeddings/local.py +260 -0
jarvis/jarvis_rag/embeddings/openai.py +62 -0
jarvis/jarvis_rag/embeddings/registry.py +293 -0
jarvis/jarvis_rag/llm_interface.py +8 -6
jarvis/jarvis_rag/query_rewriter.py +8 -9
jarvis/jarvis_rag/rag_pipeline.py +61 -52
jarvis/jarvis_rag/reranker.py +7 -75
jarvis/jarvis_rag/reranker_interface.py +32 -0
jarvis/jarvis_rag/rerankers/__init__.py +41 -0
jarvis/jarvis_rag/rerankers/base.py +109 -0
jarvis/jarvis_rag/rerankers/cohere.py +67 -0
jarvis/jarvis_rag/rerankers/edgefn.py +140 -0
jarvis/jarvis_rag/rerankers/jina.py +79 -0
jarvis/jarvis_rag/rerankers/local.py +89 -0
jarvis/jarvis_rag/rerankers/registry.py +293 -0
jarvis/jarvis_rag/retriever.py +58 -43
jarvis/jarvis_sec/__init__.py +66 -141
jarvis/jarvis_sec/agents.py +21 -17
jarvis/jarvis_sec/analysis.py +80 -33
jarvis/jarvis_sec/checkers/__init__.py +7 -13
jarvis/jarvis_sec/checkers/c_checker.py +356 -164
jarvis/jarvis_sec/checkers/rust_checker.py +47 -29
jarvis/jarvis_sec/cli.py +43 -21
jarvis/jarvis_sec/clustering.py +430 -272
jarvis/jarvis_sec/file_manager.py +99 -55
jarvis/jarvis_sec/parsers.py +9 -6
jarvis/jarvis_sec/prompts.py +4 -3
jarvis/jarvis_sec/report.py +44 -22
jarvis/jarvis_sec/review.py +180 -107
jarvis/jarvis_sec/status.py +50 -41
jarvis/jarvis_sec/types.py +3 -0
jarvis/jarvis_sec/utils.py +160 -83
jarvis/jarvis_sec/verification.py +411 -181
jarvis/jarvis_sec/workflow.py +132 -21
jarvis/jarvis_smart_shell/main.py +28 -41
jarvis/jarvis_stats/cli.py +14 -12
jarvis/jarvis_stats/stats.py +28 -19
jarvis/jarvis_stats/storage.py +14 -8
jarvis/jarvis_stats/visualizer.py +12 -7
jarvis/jarvis_tools/base.py +5 -2
jarvis/jarvis_tools/clear_memory.py +13 -9
jarvis/jarvis_tools/cli/main.py +23 -18
jarvis/jarvis_tools/edit_file.py +572 -873
jarvis/jarvis_tools/execute_script.py +10 -7
jarvis/jarvis_tools/file_analyzer.py +7 -8
jarvis/jarvis_tools/meta_agent.py +287 -0
jarvis/jarvis_tools/methodology.py +5 -3
jarvis/jarvis_tools/read_code.py +305 -1438
jarvis/jarvis_tools/read_symbols.py +50 -17
jarvis/jarvis_tools/read_webpage.py +19 -18
jarvis/jarvis_tools/registry.py +435 -156
jarvis/jarvis_tools/retrieve_memory.py +16 -11
jarvis/jarvis_tools/save_memory.py +8 -6
jarvis/jarvis_tools/search_web.py +31 -31
jarvis/jarvis_tools/sub_agent.py +32 -28
jarvis/jarvis_tools/sub_code_agent.py +44 -60
jarvis/jarvis_tools/task_list_manager.py +1811 -0
jarvis/jarvis_tools/virtual_tty.py +29 -19
jarvis/jarvis_utils/__init__.py +4 -0
jarvis/jarvis_utils/builtin_replace_map.py +2 -1
jarvis/jarvis_utils/clipboard.py +9 -8
jarvis/jarvis_utils/collections.py +331 -0
jarvis/jarvis_utils/config.py +699 -194
jarvis/jarvis_utils/dialogue_recorder.py +294 -0
jarvis/jarvis_utils/embedding.py +6 -3
jarvis/jarvis_utils/file_processors.py +7 -1
jarvis/jarvis_utils/fzf.py +9 -3
jarvis/jarvis_utils/git_utils.py +71 -42
jarvis/jarvis_utils/globals.py +116 -32
jarvis/jarvis_utils/http.py +6 -2
jarvis/jarvis_utils/input.py +318 -83
jarvis/jarvis_utils/jsonnet_compat.py +119 -104
jarvis/jarvis_utils/methodology.py +37 -28
jarvis/jarvis_utils/output.py +201 -44
jarvis/jarvis_utils/utils.py +986 -628
{jarvis_ai_assistant-0.7.16.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/METADATA +49 -33
jarvis_ai_assistant-1.0.2.dist-info/RECORD +304 -0
jarvis/jarvis_code_agent/code_analyzer/structured_code.py +0 -556
jarvis/jarvis_tools/generate_new_tool.py +0 -205
jarvis/jarvis_tools/lsp_client.py +0 -1552
jarvis/jarvis_tools/rewrite_file.py +0 -105
jarvis_ai_assistant-0.7.16.dist-info/RECORD +0 -218
{jarvis_ai_assistant-0.7.16.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/WHEEL +0 -0
{jarvis_ai_assistant-0.7.16.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/entry_points.txt +0 -0
{jarvis_ai_assistant-0.7.16.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/licenses/LICENSE +0 -0
{jarvis_ai_assistant-0.7.16.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/top_level.txt +0 -0

jarvis/jarvis_sec/clustering.py CHANGED Viewed

@@ -1,24 +1,24 @@
 # -*- coding: utf-8 -*-
 """聚类相关模块"""
-from typing import Dict, List, Optional
-from pathlib import Path
 import json
-import typer
-from jarvis.jarvis_sec.prompts import get_cluster_summary_prompt
+from pathlib import Path
+from typing import Dict
+from typing import List
+from typing import Optional
+from jarvis.jarvis_sec.agents import create_cluster_agent
+from jarvis.jarvis_sec.agents import subscribe_summary_event
+from jarvis.jarvis_sec.file_manager import get_all_clustered_gids
+from jarvis.jarvis_sec.file_manager import get_clusters_file
+from jarvis.jarvis_sec.file_manager import load_clusters
+from jarvis.jarvis_sec.file_manager import save_cluster
+from jarvis.jarvis_sec.file_manager import validate_clustering_completeness
 from jarvis.jarvis_sec.parsers import parse_clusters_from_text
-from jarvis.jarvis_sec.agents import create_cluster_agent, subscribe_summary_event
-from jarvis.jarvis_sec.utils import (
-    group_candidates_by_file,
-)
-from jarvis.jarvis_sec.file_manager import (
-    load_clusters,
-    save_cluster,
-    get_all_clustered_gids,
-    validate_clustering_completeness,
-    get_clusters_file,
-)
+from jarvis.jarvis_sec.prompts import get_cluster_summary_prompt
+from jarvis.jarvis_sec.utils import group_candidates_by_file
+from jarvis.jarvis_utils.output import PrettyOutput
 def load_existing_clusters(
@@ -26,33 +26,36 @@ def load_existing_clusters(
 ) -> tuple[Dict[tuple[str, int], List[Dict]], set, set]:
     """
     读取已有聚类报告以支持断点恢复。
     优先使用新的 clusters.jsonl 文件，如果不存在则回退到旧的 cluster_report.jsonl。
     返回: (_existing_clusters, _completed_cluster_batches, _reviewed_invalid_gids)
     """
     _existing_clusters: Dict[tuple[str, int], List[Dict]] = {}
     _completed_cluster_batches: set = set()
     _reviewed_invalid_gids: set = set()  # 已复核的无效聚类的 gids
     try:
         # 优先使用新的 clusters.jsonl 文件
         clusters = load_clusters(sec_dir)
         if clusters:
             # 从新的 clusters.jsonl 加载
             for cluster in clusters:
                 f_name = str(cluster.get("file") or "")
                 bidx = int(cluster.get("batch_index", 1) or 1)
                 _existing_clusters.setdefault((f_name, bidx), []).append(cluster)
                 # 从分析结果文件中读取已复核的无效聚类
                 # 如果聚类是无效的，且其gids都在分析结果中被标记为误报，则认为已复核
                 if cluster.get("is_invalid", False):
                     gids_list = cluster.get("gids", [])
                     if isinstance(gids_list, list):
                         # 检查这些gid是否都在分析结果中被标记为误报
-                        from jarvis.jarvis_sec.file_manager import get_false_positive_gids
+                        from jarvis.jarvis_sec.file_manager import (
+                            get_false_positive_gids,
+                        )
                         false_positive_gids = get_false_positive_gids(sec_dir)
                         all_false_positive = all(
                             int(gid_val) in false_positive_gids
@@ -72,7 +75,7 @@ def load_existing_clusters(
         _existing_clusters = {}
         _completed_cluster_batches = set()
         _reviewed_invalid_gids = set()
     return _existing_clusters, _completed_cluster_batches, _reviewed_invalid_gids
@@ -83,7 +86,7 @@ def restore_clusters_from_checkpoint(
 ) -> tuple[List[List[Dict]], List[Dict], List[Dict], set]:
     """
     从断点恢复聚类结果。
     返回: (cluster_batches, cluster_records, invalid_clusters_for_review, clustered_gids)
     """
     # 1. 收集所有候选的 gid
@@ -98,15 +101,17 @@ def restore_clusters_from_checkpoint(
                     gid_to_candidate[_gid] = it
             except Exception:
                 pass
     # 2. 从 cluster_report.jsonl 恢复所有聚类结果
-    clustered_gids = set()  # 已聚类的 gid（包括有效和无效的，因为无效的也需要进入复核阶段）
+    clustered_gids = (
+        set()
+    )  # 已聚类的 gid（包括有效和无效的，因为无效的也需要进入复核阶段）
     invalid_clusters_for_review: List[Dict] = []  # 无效聚类列表（从断点恢复）
     cluster_batches: List[List[Dict]] = []
     cluster_records: List[Dict] = []
     skipped_reviewed_count = 0  # 已复核的无效聚类数量（跳过）
     missing_gids_in_restore = set()  # 记录恢复时无法匹配的gid（用于诊断）
     # 首先，从所有聚类记录中收集所有已聚类的 gid（无论是否在当前候选集中）
     # 这样可以确保即使匹配失败，只要 gid 在 clusters.jsonl 中且在当前候选集中，就会被计入 clustered_gids
     all_clustered_gids_from_file = set()
@@ -121,13 +126,13 @@ def restore_clusters_from_checkpoint(
                             all_clustered_gids_from_file.add(_gid_int)
                     except Exception:
                         pass
     # 对于所有在 clusters.jsonl 中记录的 gid，如果它们也在当前候选集中，就计入 clustered_gids
     # 这样可以避免因为匹配失败而导致的遗漏
     for _gid_int in all_clustered_gids_from_file:
         if _gid_int in all_candidate_gids_in_clustering:
             clustered_gids.add(_gid_int)
     # 然后，尝试恢复具体的聚类信息（用于恢复 cluster_batches 和 invalid_clusters_for_review）
     for (_file_key, _batch_idx), cluster_recs in _existing_clusters.items():
         for rec in cluster_recs:
@@ -168,7 +173,7 @@ def restore_clusters_from_checkpoint(
                                             pass
                                     if found_candidate:
                                         break
                                 if found_candidate:
                                     # 找到了对应的候选，添加到members中
                                     found_candidate["verify"] = verification
@@ -186,7 +191,7 @@ def restore_clusters_from_checkpoint(
                                 pass
                 except Exception:
                     pass
             # 只有当至少有一个gid在当前候选集中时，才恢复这个聚类
             # 如果所有gid都不在当前候选集中，说明这些gid对应的候选在当前运行中不存在
             # 这种情况下，不应该恢复这个聚类，因为这些gid不在当前运行中
@@ -198,49 +203,63 @@ def restore_clusters_from_checkpoint(
                     cluster_gids_int = set()
                     for gid_val in cluster_gids:
                         try:
+                            if gid_val is None:
+                                continue
                             gid_int = int(gid_val)
                             if gid_int >= 1:
                                 cluster_gids_int.add(gid_int)
                         except Exception:
                             pass
                     # 检查所有 gid 是否都已被复核过
-                    all_reviewed = cluster_gids_int and cluster_gids_int.issubset(_reviewed_invalid_gids)
+                    all_reviewed = cluster_gids_int and cluster_gids_int.issubset(
+                        _reviewed_invalid_gids
+                    )
                     if not all_reviewed:
                         # 如果还有未复核的 gid，收集到复核列表
-                        invalid_clusters_for_review.append({
-                            "file": _file_key,
-                            "batch_index": _batch_idx,
-                            "gids": cluster_gids,
-                            "verification": verification,
-                            "invalid_reason": str(rec.get("invalid_reason", "")).strip(),
-                            "members": members,  # 保存候选信息，用于复核后可能重新加入验证
-                            "count": len(members),
-                        })
+                        invalid_clusters_for_review.append(
+                            {
+                                "file": _file_key,
+                                "batch_index": _batch_idx,
+                                "gids": cluster_gids,
+                                "verification": verification,
+                                "invalid_reason": str(
+                                    rec.get("invalid_reason", "")
+                                ).strip(),
+                                "members": members,  # 保存候选信息，用于复核后可能重新加入验证
+                                "count": len(members),
+                            }
+                        )
                     else:
                         # 如果所有 gid 都已被复核过，则跳过（不加入复核列表）
                         skipped_reviewed_count += 1
                 else:
                     # 有效聚类：恢复到 cluster_batches
                     cluster_batches.append(members)
-                    cluster_records.append({
-                        "file": _file_key,
-                        "verification": verification,
-                        "gids": [m.get("gid") for m in members],
-                        "count": len(members),
-                        "batch_index": _batch_idx,
-                        "is_invalid": False,
-                    })
+                    cluster_records.append(
+                        {
+                            "file": _file_key,
+                            "verification": verification,
+                            "gids": [m.get("gid") for m in members],
+                            "count": len(members),
+                            "batch_index": _batch_idx,
+                            "is_invalid": False,
+                        }
+                    )
     # 输出统计信息
     if _reviewed_invalid_gids:
         try:
-            typer.secho(f"[jarvis-sec] 断点恢复：发现 {len(_reviewed_invalid_gids)} 个已复核的无效聚类 gids", fg=typer.colors.BLUE)
+            PrettyOutput.auto_print(
+                f"[jarvis-sec] 断点恢复：发现 {len(_reviewed_invalid_gids)} 个已复核的无效聚类 gids",
+            )
         except Exception:
             pass
     if skipped_reviewed_count > 0:
         try:
-            typer.secho(f"[jarvis-sec] 断点恢复：跳过 {skipped_reviewed_count} 个已复核的无效聚类", fg=typer.colors.BLUE)
+            PrettyOutput.auto_print(
+                f"[jarvis-sec] 断点恢复：跳过 {skipped_reviewed_count} 个已复核的无效聚类",
+            )
         except Exception:
             pass
     if missing_gids_in_restore:
@@ -251,31 +270,42 @@ def restore_clusters_from_checkpoint(
         try:
             if missing_count <= 20:
                 missing_list = sorted(list(missing_gids_in_restore))
-                typer.secho(f"[jarvis-sec] 断点恢复诊断：发现 {missing_count} 个gid在当前候选集中但无法匹配（可能存在数据不一致）: {missing_list}", fg=typer.colors.YELLOW)
+                PrettyOutput.auto_print(
+                    f"[jarvis-sec] 断点恢复诊断：发现 {missing_count} 个gid在当前候选集中但无法匹配（可能存在数据不一致）: {missing_list}",
+                )
             else:
                 missing_list = sorted(list(missing_gids_in_restore))
                 display_list = missing_list[:10] + ["..."] + missing_list[-10:]
-                typer.secho(f"[jarvis-sec] 断点恢复诊断：发现 {missing_count} 个gid在当前候选集中但无法匹配（可能存在数据不一致）: {display_list}", fg=typer.colors.YELLOW)
+                PrettyOutput.auto_print(
+                    f"[jarvis-sec] 断点恢复诊断：发现 {missing_count} 个gid在当前候选集中但无法匹配（可能存在数据不一致）: {display_list}",
+                )
         except Exception:
             pass
     return cluster_batches, cluster_records, invalid_clusters_for_review, clustered_gids
-def create_cluster_snapshot_writer(sec_dir: Path, cluster_records: List[Dict], compact_candidates: List[Dict], _progress_append):
+def create_cluster_snapshot_writer(
+    sec_dir: Path,
+    cluster_records: List[Dict],
+    compact_candidates: List[Dict],
+    _progress_append,
+):
     """创建聚类快照写入函数"""
     def _write_cluster_batch_snapshot(batch_records: List[Dict]):
         """写入单个批次的聚类结果，支持增量保存"""
         try:
             # 按 (file, batch_index) 分组，为每个分组内的记录生成唯一的 cluster_index
             from collections import defaultdict
             records_by_key = defaultdict(list)
             for record in batch_records:
                 file_name = str(record.get("file", ""))
                 batch_index = int(record.get("batch_index", 0))
                 key = (file_name, batch_index)
                 records_by_key[key].append(record)
             # 为每个分组内的记录生成 cluster_index
             for (file_name, batch_index), records in records_by_key.items():
                 for local_idx, record in enumerate(records):
@@ -285,9 +315,9 @@ def create_cluster_snapshot_writer(sec_dir: Path, cluster_records: List[Dict], c
                         cluster_index = local_idx
                     else:
                         cluster_index = int(cluster_index)
                     cluster_id = f"{file_name}|{batch_index}|{cluster_index}"
                     # 转换为新的格式
                     cluster = {
                         "cluster_id": cluster_id,
@@ -299,12 +329,12 @@ def create_cluster_snapshot_writer(sec_dir: Path, cluster_records: List[Dict], c
                         "is_invalid": record.get("is_invalid", False),
                         "invalid_reason": str(record.get("invalid_reason", "")).strip(),
                     }
                     # 使用新的文件管理器保存
                     save_cluster(sec_dir, cluster)
         except Exception:
             pass
     def _write_cluster_report_snapshot():
         """写入聚类报告快照"""
         try:
@@ -314,7 +344,7 @@ def create_cluster_snapshot_writer(sec_dir: Path, cluster_records: List[Dict], c
                 batch_index = int(record.get("batch_index", 0))
                 cluster_index = idx  # 使用索引作为 cluster_index
                 cluster_id = f"{file_name}|{batch_index}|{cluster_index}"
                 # 转换为新的格式
                 cluster = {
                     "cluster_id": cluster_id,
@@ -326,7 +356,7 @@ def create_cluster_snapshot_writer(sec_dir: Path, cluster_records: List[Dict], c
                     "is_invalid": record.get("is_invalid", False),
                     "invalid_reason": str(record.get("invalid_reason", "")).strip(),
                 }
                 # 使用新的文件管理器保存
                 save_cluster(sec_dir, cluster)
@@ -340,7 +370,7 @@ def create_cluster_snapshot_writer(sec_dir: Path, cluster_records: List[Dict], c
             )
         except Exception:
             pass
     return _write_cluster_batch_snapshot, _write_cluster_report_snapshot
@@ -358,7 +388,9 @@ def collect_candidate_gids(file_groups: Dict[str, List[Dict]]) -> set:
     return all_gids
-def collect_clustered_gids(cluster_batches: List[List[Dict]], invalid_clusters_for_review: List[Dict]) -> set:
+def collect_clustered_gids(
+    cluster_batches: List[List[Dict]], invalid_clusters_for_review: List[Dict]
+) -> set:
     """收集所有已聚类的 gid"""
     all_clustered_gids = set()
     for batch in cluster_batches:
@@ -394,20 +426,21 @@ def filter_single_gid_clusters(
 ) -> List[List[Dict]]:
     """
     过滤掉单独聚类的批次（只包含1个gid的批次），避免分析工作量激增。
     这些单独聚类通常是之前为遗漏的gid自动创建的，现在不再需要。
     """
     filtered_batches = []
     removed_count = 0
     removed_gids = set()
     # 读取已分析的gid（从analysis.jsonl）
     from jarvis.jarvis_sec.file_manager import get_all_analyzed_gids
     processed_gids = get_all_analyzed_gids(sec_dir)
     # 读取clusters.jsonl中的所有gid
     cluster_report_gids = get_all_clustered_gids(sec_dir)
     for batch in cluster_batches:
         # 检查批次大小
         if len(batch) == 1:
@@ -420,17 +453,21 @@ def filter_single_gid_clusters(
                     if gid in processed_gids:
                         removed_count += 1
                         removed_gids.add(gid)
-                        _progress_append({
-                            "event": "single_cluster_removed",
-                            "gid": gid,
-                            "reason": "already_analyzed",
-                        })
+                        _progress_append(
+                            {
+                                "event": "single_cluster_removed",
+                                "gid": gid,
+                                "reason": "already_analyzed",
+                            }
+                        )
                         continue
                     # 检查verification字段，如果是默认的"验证候选 X 的安全风险"，说明是自动创建的单独聚类
                     verification = str(single_item.get("verify", "")).strip()
-                    is_auto_created = verification.startswith("验证候选 ") and verification.endswith(" 的安全风险")
+                    is_auto_created = verification.startswith(
+                        "验证候选 "
+                    ) and verification.endswith(" 的安全风险")
                     if is_auto_created:
                         # 这是自动创建的单独聚类
                         # 如果gid在clusters.jsonl中有记录，说明已经聚类过了，可以安全移除
@@ -438,18 +475,22 @@ def filter_single_gid_clusters(
                         if gid in cluster_report_gids:
                             removed_count += 1
                             removed_gids.add(gid)
-                            _progress_append({
-                                "event": "single_cluster_removed",
-                                "gid": gid,
-                                "reason": "auto_created_and_in_clusters",
-                            })
+                            _progress_append(
+                                {
+                                    "event": "single_cluster_removed",
+                                    "gid": gid,
+                                    "reason": "auto_created_and_in_clusters",
+                                }
+                            )
                             continue
                         else:
                             # 自动创建的单独聚类，但不在clusters.jsonl中，也不在analysis.jsonl中
                             # 说明需要分析，保留它（避免遗漏告警）
                             # 但给出警告，因为这种情况不应该发生
                             try:
-                                typer.secho(f"[jarvis-sec] 警告：gid={gid}是自动创建的单独聚类，但不在clusters.jsonl中，保留以避免遗漏告警", fg=typer.colors.YELLOW)
+                                PrettyOutput.auto_print(
+                                    f"[jarvis-sec] 警告：gid={gid}是自动创建的单独聚类，但不在clusters.jsonl中，保留以避免遗漏告警",
+                                )
                             except Exception:
                                 pass
                     else:
@@ -458,23 +499,33 @@ def filter_single_gid_clusters(
                         pass
             except Exception:
                 pass
         # 保留这个批次（不是单独聚类，或者单独聚类但需要保留）
         filtered_batches.append(batch)
     if removed_count > 0:
         try:
             if len(removed_gids) <= 20:
-                typer.secho(f"[jarvis-sec] 已移除 {removed_count} 个单独聚类批次（共{len(removed_gids)}个gid），避免分析工作量激增", fg=typer.colors.GREEN)
-                typer.secho(f"[jarvis-sec] 移除的gid: {sorted(list(removed_gids))}", fg=typer.colors.GREEN)
+                PrettyOutput.auto_print(
+                    f"[jarvis-sec] 已移除 {removed_count} 个单独聚类批次（共{len(removed_gids)}个gid），避免分析工作量激增",
+                )
+                PrettyOutput.auto_print(
+                    f"[jarvis-sec] 移除的gid: {sorted(list(removed_gids))}",
+                )
             else:
                 removed_gids_list = sorted(list(removed_gids))
-                display_list = removed_gids_list[:10] + ["..."] + removed_gids_list[-10:]
-                typer.secho(f"[jarvis-sec] 已移除 {removed_count} 个单独聚类批次（共{len(removed_gids)}个gid），避免分析工作量激增", fg=typer.colors.GREEN)
-                typer.secho(f"[jarvis-sec] 移除的gid（示例）: {display_list}", fg=typer.colors.GREEN)
+                display_list = (
+                    removed_gids_list[:10] + ["..."] + removed_gids_list[-10:]
+                )
+                PrettyOutput.auto_print(
+                    f"[jarvis-sec] 已移除 {removed_count} 个单独聚类批次（共{len(removed_gids)}个gid），避免分析工作量激增",
+                )
+                PrettyOutput.auto_print(
+                    f"[jarvis-sec] 移除的gid（示例）: {display_list}",
+                )
         except Exception:
             pass
     return filtered_batches
@@ -512,65 +563,76 @@ def handle_single_alert_file(
         }
     )
     current_batch_records = [
-        rec for rec in cluster_records
+        rec
+        for rec in cluster_records
         if rec.get("file") == file and rec.get("batch_index") == 1
     ]
     if current_batch_records:
         _write_cluster_batch_snapshot(current_batch_records)
-    typer.secho(f"[jarvis-sec] 文件 {file} 仅有一个告警（gid={single_gid}），跳过聚类直接写入", fg=typer.colors.BLUE)
+    PrettyOutput.auto_print(
+        f"[jarvis-sec] 文件 {file} 仅有一个告警（gid={single_gid}），跳过聚类直接写入",
+    )
 def validate_cluster_format(cluster_items: List[Dict]) -> tuple[bool, List[str]]:
     """验证聚类结果的格式，返回(是否有效, 错误详情列表)"""
     if not isinstance(cluster_items, list) or not cluster_items:
         return False, ["结果不是数组或数组为空"]
     error_details = []
     for idx, it in enumerate(cluster_items):
         if not isinstance(it, dict):
             error_details.append(f"元素{idx}不是字典")
             return False, error_details
         vals = it.get("gids", [])
-        if not isinstance(it.get("verification", ""), str) or not isinstance(vals, list):
+        if not isinstance(it.get("verification", ""), str) or not isinstance(
+            vals, list
+        ):
             error_details.append(f"元素{idx}的verification或gids格式错误")
             return False, error_details
         # 校验 gids 列表中的每个元素是否都是有效的整数
         if isinstance(vals, list):
             for gid_idx, gid_val in enumerate(vals):
                 try:
                     gid_int = int(gid_val)
                     if gid_int < 1:
-                        error_details.append(f"元素{idx}的gids[{gid_idx}]不是有效的正整数（值为{gid_val}）")
+                        error_details.append(
+                            f"元素{idx}的gids[{gid_idx}]不是有效的正整数（值为{gid_val}）"
+                        )
                         return False, error_details
                 except (ValueError, TypeError):
-                    error_details.append(f"元素{idx}的gids[{gid_idx}]不是有效的整数（值为{gid_val}，类型为{type(gid_val).__name__}）")
+                    error_details.append(
+                        f"元素{idx}的gids[{gid_idx}]不是有效的整数（值为{gid_val}，类型为{type(gid_val).__name__}）"
+                    )
                     return False, error_details
         # 校验 is_invalid 字段（必填）
         if "is_invalid" not in it:
             error_details.append(f"元素{idx}缺少is_invalid字段（必填）")
             return False, error_details
         is_invalid_val = it.get("is_invalid")
         if not isinstance(is_invalid_val, bool):
             error_details.append(f"元素{idx}的is_invalid不是布尔值")
             return False, error_details
         # 如果is_invalid为true，必须提供invalid_reason
         if is_invalid_val is True:
             invalid_reason = it.get("invalid_reason", "")
             if not isinstance(invalid_reason, str) or not invalid_reason.strip():
-                error_details.append(f"元素{idx}的is_invalid为true但缺少invalid_reason字段或理由为空（必填）")
+                error_details.append(
+                    f"元素{idx}的is_invalid为true但缺少invalid_reason字段或理由为空（必填）"
+                )
                 return False, error_details
     return True, []
 def extract_classified_gids(cluster_items: List[Dict]) -> set:
     """从聚类结果中提取所有已分类的gid
     注意：此函数假设格式验证已经通过，所有gid都是有效的整数。
     如果遇到格式错误的gid，会记录警告但不会抛出异常（因为格式验证应该已经捕获了这些问题）。
     """
@@ -586,7 +648,9 @@ def extract_classified_gids(cluster_items: List[Dict]) -> set:
                 except (ValueError, TypeError):
                     # 理论上不应该到达这里（格式验证应该已经捕获），但如果到达了，记录警告
                     try:
-                        typer.secho(f"[jarvis-sec] 警告：在提取gid时遇到格式错误（值={x}，类型={type(x).__name__}），这不应该发生（格式验证应该已捕获）", fg=typer.colors.YELLOW)
+                        PrettyOutput.auto_print(
+                            f"[jarvis-sec] 警告：在提取gid时遇到格式错误（值={x}，类型={type(x).__name__}），这不应该发生（格式验证应该已捕获）",
+                        )
                     except Exception:
                         pass
                     continue
@@ -608,9 +672,14 @@ def build_cluster_retry_task(
     if missing_gids:
         missing_gids_list = sorted(list(missing_gids))
         missing_count = len(missing_gids)
-        retry_task += f"\n\n**遗漏的gid（共{missing_count}个，必须被分类）：**\n" + ", ".join(str(gid) for gid in missing_gids_list)
+        retry_task += (
+            f"\n\n**遗漏的gid（共{missing_count}个，必须被分类）：**\n"
+            + ", ".join(str(gid) for gid in missing_gids_list)
+        )
     if error_details:
-        retry_task += "\n\n**格式错误：**\n" + "\n".join(f"- {detail}" for detail in error_details)
+        retry_task += "\n\n**格式错误：**\n" + "\n".join(
+            f"- {detail}" for detail in error_details
+        )
     return retry_task
@@ -621,11 +690,17 @@ def build_cluster_error_guidance(
     """构建聚类错误指导信息"""
     error_guidance = ""
     if error_details:
-        error_guidance = "\n\n**格式错误详情（请根据以下错误修复输出格式）：**\n" + "\n".join(f"- {detail}" for detail in error_details)
+        error_guidance = (
+            "\n\n**格式错误详情（请根据以下错误修复输出格式）：**\n"
+            + "\n".join(f"- {detail}" for detail in error_details)
+        )
     if missing_gids:
         missing_gids_list = sorted(list(missing_gids))
         missing_count = len(missing_gids)
-        error_guidance += f"\n\n**完整性错误：遗漏了 {missing_count} 个 gid，这些 gid 必须被分类：**\n" + ", ".join(str(gid) for gid in missing_gids_list)
+        error_guidance += (
+            f"\n\n**完整性错误：遗漏了 {missing_count} 个 gid，这些 gid 必须被分类：**\n"
+            + ", ".join(str(gid) for gid in missing_gids_list)
+        )
     return error_guidance
@@ -643,11 +718,13 @@ def run_cluster_agent_direct_model(
     error_guidance = build_cluster_error_guidance(error_details, missing_gids)
     full_prompt = f"{retry_task}{error_guidance}\n\n{cluster_summary_prompt}"
     try:
-        response = cluster_agent.model.chat_until_success(full_prompt)  # type: ignore
+        response = cluster_agent.model.chat_until_success(full_prompt)
         _cluster_summary["text"] = response
     except Exception as e:
         try:
-            typer.secho(f"[jarvis-sec] 直接模型调用失败: {e}，回退到 run()", fg=typer.colors.YELLOW)
+            PrettyOutput.auto_print(
+                f"[jarvis-sec] 直接模型调用失败: {e}，回退到 run()",
+            )
         except Exception:
             pass
         cluster_agent.run(cluster_task)
@@ -661,12 +738,14 @@ def validate_cluster_result(
     """验证聚类结果格式"""
     if parse_error:
         error_details = [f"JSON解析失败: {parse_error}"]
-        typer.secho(f"[jarvis-sec] JSON解析失败: {parse_error}", fg=typer.colors.YELLOW)
+        PrettyOutput.auto_print(f"[jarvis-sec] JSON解析失败: {parse_error}")
         return False, error_details
     else:
-        valid, error_details = validate_cluster_format(cluster_items)
+        valid, error_details = validate_cluster_format(cluster_items or [])
         if not valid:
-            typer.secho(f"[jarvis-sec] 聚类结果格式无效（{'; '.join(error_details)}），重试第 {attempt} 次（使用直接模型调用）", fg=typer.colors.YELLOW)
+            PrettyOutput.auto_print(
+                f"[jarvis-sec] 聚类结果格式无效（{'; '.join(error_details)}），重试第 {attempt} 次（使用直接模型调用）",
+            )
         return valid, error_details
@@ -679,12 +758,16 @@ def check_cluster_completeness(
     classified_gids = extract_classified_gids(cluster_items)
     missing_gids = input_gids - classified_gids
     if not missing_gids:
-        typer.secho(f"[jarvis-sec] 聚类完整性校验通过，所有gid已分类（共尝试 {attempt} 次）", fg=typer.colors.GREEN)
+        PrettyOutput.auto_print(
+            f"[jarvis-sec] 聚类完整性校验通过，所有gid已分类（共尝试 {attempt} 次）",
+        )
         return True, set()
     else:
         missing_gids_list = sorted(list(missing_gids))
         missing_count = len(missing_gids)
-        typer.secho(f"[jarvis-sec] 聚类完整性校验失败：遗漏的gid: {missing_gids_list}（{missing_count}个），重试第 {attempt} 次（使用直接模型调用）", fg=typer.colors.YELLOW)
+        PrettyOutput.auto_print(
+            f"[jarvis-sec] 聚类完整性校验失败：遗漏的gid: {missing_gids_list}（{missing_count}个），重试第 {attempt} 次（使用直接模型调用）",
+        )
         return False, missing_gids
@@ -704,13 +787,13 @@ def run_cluster_agent_with_retry(
     _attempt = 0
     use_direct_model = False
     error_details: List[str] = []
-    missing_gids = set()
+    missing_gids: set[str] = set()
     consecutive_failures = 0  # 连续失败次数
     while True:
         _attempt += 1
         _cluster_summary["text"] = ""
         if use_direct_model:
             run_cluster_agent_direct_model(
                 cluster_agent,
@@ -724,26 +807,32 @@ def run_cluster_agent_with_retry(
         else:
             # 第一次使用 run()，让 Agent 完整运行（可能使用工具）
             cluster_agent.run(cluster_task)
         cluster_summary_text = _cluster_summary.get("text", "")
         # 调试：如果解析失败，输出摘要文本的前500个字符用于调试
         cluster_items, parse_error = parse_clusters_from_text(cluster_summary_text)
         # 如果解析失败且是第一次尝试，输出调试信息
         if parse_error and _attempt == 1:
             preview = cluster_summary_text[:500] if cluster_summary_text else "(空)"
             try:
-                typer.secho(f"[jarvis-sec] 调试：摘要文本预览（前500字符）: {preview}", fg=typer.colors.CYAN, err=True)
+                PrettyOutput.auto_print(
+                    f"[jarvis-sec] 调试：摘要文本预览（前500字符）: {preview}",
+                )
             except Exception:
                 pass
         # 校验结构
-        valid, error_details = validate_cluster_result(cluster_items, parse_error, _attempt)
+        valid, error_details = validate_cluster_result(
+            cluster_items, parse_error, _attempt
+        )
         # 完整性校验：检查所有输入的gid是否都被分类
         missing_gids = set()
         if valid and cluster_items:
-            is_complete, missing_gids = check_cluster_completeness(cluster_items, input_gids, _attempt)
+            is_complete, missing_gids = check_cluster_completeness(
+                cluster_items, input_gids, _attempt
+            )
             if is_complete:
                 return cluster_items, None, False
             else:
@@ -752,15 +841,17 @@ def run_cluster_agent_with_retry(
                 consecutive_failures += 1
         else:
             consecutive_failures += 1
         # 如果连续失败5次，且提供了创建agent的函数，则返回需要重新创建agent的标志
         if not valid and consecutive_failures >= 5 and create_agent_func is not None:
             try:
-                typer.secho(f"[jarvis-sec] 连续失败 {consecutive_failures} 次，需要重新创建agent", fg=typer.colors.YELLOW)
+                PrettyOutput.auto_print(
+                    f"[jarvis-sec] 连续失败 {consecutive_failures} 次，需要重新创建agent",
+                )
             except Exception:
                 pass
             return None, parse_error or "连续失败5次", True
         if not valid:
             use_direct_model = True
             cluster_items = None
@@ -788,11 +879,11 @@ def process_cluster_results(
                 pass
     except Exception:
         gid_to_item = {}
     _merged_count = 0
     _invalid_count = 0
     classified_gids_final = set()
     for cl in cluster_items:
         verification = str(cl.get("verification", "")).strip()
         raw_gids = cl.get("gids", [])
@@ -807,61 +898,71 @@ def process_cluster_results(
                         classified_gids_final.add(xi)
                 except Exception:
                     pass
         members: List[Dict] = []
         for k in norm_keys:
-            it = gid_to_item.get(k)
-            if it:
-                it["verify"] = verification
-                members.append(it)
+            item = gid_to_item.get(k)
+            if item is not None:
+                item["verify"] = verification
+                members.append(item)
         # 如果标记为无效，收集到复核列表
         if is_invalid:
             _invalid_count += 1
             invalid_gids = [m.get("gid") for m in members]
             invalid_reason = str(cl.get("invalid_reason", "")).strip()
             try:
-                typer.secho(f"[jarvis-sec] 聚类阶段判定为无效（gids={invalid_gids}），将提交复核Agent验证", fg=typer.colors.BLUE)
+                PrettyOutput.auto_print(
+                    f"[jarvis-sec] 聚类阶段判定为无效（gids={invalid_gids}），将提交复核Agent验证",
+                )
             except Exception:
                 pass
-            invalid_clusters_for_review.append({
-                "file": file,
-                "batch_index": chunk_idx,
-                "gids": invalid_gids,
-                "verification": verification,
-                "invalid_reason": invalid_reason,
-                "members": members,
-                "count": len(members),
-            })
-            _progress_append({
-                "event": "cluster_invalid",
-                "file": file,
-                "batch_index": chunk_idx,
-                "gids": invalid_gids,
-                "verification": verification,
-                "count": len(members),
-            })
-            cluster_records.append({
-                "file": file,
-                "verification": verification,
-                "gids": invalid_gids,
-                "count": len(members),
-                "batch_index": chunk_idx,
-                "is_invalid": True,
-                "invalid_reason": invalid_reason,
-            })
+            invalid_clusters_for_review.append(
+                {
+                    "file": file,
+                    "batch_index": chunk_idx,
+                    "gids": invalid_gids,
+                    "verification": verification,
+                    "invalid_reason": invalid_reason,
+                    "members": members,
+                    "count": len(members),
+                }
+            )
+            _progress_append(
+                {
+                    "event": "cluster_invalid",
+                    "file": file,
+                    "batch_index": chunk_idx,
+                    "gids": invalid_gids,
+                    "verification": verification,
+                    "count": len(members),
+                }
+            )
+            cluster_records.append(
+                {
+                    "file": file,
+                    "verification": verification,
+                    "gids": invalid_gids,
+                    "count": len(members),
+                    "batch_index": chunk_idx,
+                    "is_invalid": True,
+                    "invalid_reason": invalid_reason,
+                }
+            )
         elif members:
             _merged_count += 1
             cluster_batches.append(members)
-            cluster_records.append({
-                "file": file,
-                "verification": verification,
-                "gids": [m.get("gid") for m in members],
-                "count": len(members),
-                "batch_index": chunk_idx,
-                "is_invalid": False,
-            })
+            cluster_records.append(
+                {
+                    "file": file,
+                    "verification": verification,
+                    "gids": [m.get("gid") for m in members],
+                    "count": len(members),
+                    "batch_index": chunk_idx,
+                    "is_invalid": False,
+                }
+            )
     return _merged_count, _invalid_count
@@ -881,14 +982,16 @@ def supplement_missing_gids(
             default_verification = f"验证候选 {missing_gid} 的安全风险"
             missing_item["verify"] = default_verification
             cluster_batches.append([missing_item])
-            cluster_records.append({
-                "file": file,
-                "verification": default_verification,
-                "gids": [missing_gid],
-                "count": 1,
-                "batch_index": chunk_idx,
-                "note": "完整性校验补充的遗漏gid",
-            })
+            cluster_records.append(
+                {
+                    "file": file,
+                    "verification": default_verification,
+                    "gids": [missing_gid],
+                    "count": 1,
+                    "batch_index": chunk_idx,
+                    "note": "完整性校验补充的遗漏gid",
+                }
+            )
             supplemented_count += 1
     return supplemented_count
@@ -958,35 +1061,39 @@ def process_cluster_chunk(
     """处理单个聚类批次"""
     if not chunk:
         return
     pending_in_file_with_ids = list(chunk)
     # 记录聚类批次开始
-    _progress_append({
-        "event": "cluster_status",
-        "status": "running",
-        "file": file,
-        "batch_index": chunk_idx,
-        "total_in_batch": len(pending_in_file_with_ids),
-    })
+    _progress_append(
+        {
+            "event": "cluster_status",
+            "status": "running",
+            "file": file,
+            "batch_index": chunk_idx,
+            "total_in_batch": len(pending_in_file_with_ids),
+        }
+    )
     # 创建聚类Agent
-    cluster_agent = create_cluster_agent(file, chunk_idx, llm_group, force_save_memory=force_save_memory)
+    cluster_agent = create_cluster_agent(
+        file, chunk_idx, llm_group, force_save_memory=force_save_memory
+    )
     # 构建任务上下文
     cluster_task = build_cluster_task(pending_in_file_with_ids, entry_path, file, langs)
     # 提取输入gid
     input_gids = extract_input_gids(pending_in_file_with_ids)
     # 运行聚类Agent（支持重新创建agent，不限次数）
     cluster_summary_prompt = get_cluster_summary_prompt()
     recreate_count = 0
     while True:
         # 订阅摘要事件（每次重新创建agent后需要重新订阅）
         cluster_summary = subscribe_summary_event(cluster_agent)
         cluster_items, parse_error, need_recreate = run_cluster_agent_with_retry(
             cluster_agent,
             cluster_task,
@@ -994,28 +1101,34 @@ def process_cluster_chunk(
             input_gids,
             file,
             cluster_summary,
-            create_agent_func=lambda: create_cluster_agent(file, chunk_idx, llm_group, force_save_memory=force_save_memory),
+            create_agent_func=lambda: create_cluster_agent(
+                file, chunk_idx, llm_group, force_save_memory=force_save_memory
+            ),
         )
         # 如果不需要重新创建agent，退出循环
         if not need_recreate:
             break
         # 需要重新创建agent（不限次数）
         recreate_count += 1
         try:
-            typer.secho(f"[jarvis-sec] 重新创建聚类Agent（第 {recreate_count} 次）", fg=typer.colors.MAGENTA)
+            PrettyOutput.auto_print(
+                f"[jarvis-sec] 重新创建聚类Agent（第 {recreate_count} 次）",
+            )
         except Exception:
             pass
-        cluster_agent = create_cluster_agent(file, chunk_idx, llm_group, force_save_memory=force_save_memory)
+        cluster_agent = create_cluster_agent(
+            file, chunk_idx, llm_group, force_save_memory=force_save_memory
+        )
     # 处理聚类结果
     _merged_count = 0
     _invalid_count = 0
     if isinstance(cluster_items, list) and cluster_items:
         gid_to_item = build_gid_to_item_mapping(pending_in_file_with_ids)
         _merged_count, _invalid_count = process_cluster_results(
             cluster_items,
             pending_in_file_with_ids,
@@ -1026,11 +1139,13 @@ def process_cluster_chunk(
             invalid_clusters_for_review,
             _progress_append,
         )
         classified_gids_final = extract_classified_gids(cluster_items)
         missing_gids_final = input_gids - classified_gids_final
         if missing_gids_final:
-            typer.secho(f"[jarvis-sec] 警告：仍有遗漏的gid {sorted(list(missing_gids_final))}，将为每个遗漏的gid创建单独聚类", fg=typer.colors.YELLOW)
+            PrettyOutput.auto_print(
+                f"[jarvis-sec] 警告：仍有遗漏的gid {sorted(list(missing_gids_final))}，将为每个遗漏的gid创建单独聚类",
+            )
             supplemented_count = supplement_missing_gids(
                 missing_gids_final,
                 gid_to_item,
@@ -1043,9 +1158,11 @@ def process_cluster_chunk(
     else:
         # 聚类结果为空或None：为所有输入的gid创建单独聚类（保守策略）
         if pending_in_file_with_ids:
-            typer.secho(f"[jarvis-sec] 警告：聚类结果为空或None（文件={file}，批次={chunk_idx}），为所有gid创建单独聚类", fg=typer.colors.YELLOW)
+            PrettyOutput.auto_print(
+                f"[jarvis-sec] 警告：聚类结果为空或None（文件={file}，批次={chunk_idx}），为所有gid创建单独聚类",
+            )
             gid_to_item_fallback = build_gid_to_item_mapping(pending_in_file_with_ids)
             _merged_count = supplement_missing_gids(
                 input_gids,
                 gid_to_item_fallback,
@@ -1058,25 +1175,30 @@ def process_cluster_chunk(
         else:
             _merged_count = 0
             _invalid_count = 0
     # 标记聚类批次完成
-    _progress_append({
-        "event": "cluster_status",
-        "status": "done",
-        "file": file,
-        "batch_index": chunk_idx,
-        "clusters_count": _merged_count,
-        "invalid_clusters_count": _invalid_count,
-    })
+    _progress_append(
+        {
+            "event": "cluster_status",
+            "status": "done",
+            "file": file,
+            "batch_index": chunk_idx,
+            "clusters_count": _merged_count,
+            "invalid_clusters_count": _invalid_count,
+        }
+    )
     if _invalid_count > 0:
         try:
-            typer.secho(f"[jarvis-sec] 聚类批次完成: 有效聚类={_merged_count}，无效聚类={_invalid_count}（已跳过）", fg=typer.colors.GREEN)
+            PrettyOutput.auto_print(
+                f"[jarvis-sec] 聚类批次完成: 有效聚类={_merged_count}，无效聚类={_invalid_count}（已跳过）",
+            )
         except Exception:
             pass
     # 写入当前批次的聚类结果
     current_batch_records = [
-        rec for rec in cluster_records
+        rec
+        for rec in cluster_records
         if rec.get("file") == file and rec.get("batch_index") == chunk_idx
     ]
     if current_batch_records:
@@ -1116,7 +1238,7 @@ def process_file_clustering(
     pending_in_file = filter_pending_items(items, clustered_gids)
     if not pending_in_file:
         return
     # 优化：如果文件只有一个告警，跳过聚类，直接写入
     if len(pending_in_file) == 1:
         single_item = pending_in_file[0]
@@ -1131,11 +1253,15 @@ def process_file_clustering(
             _write_cluster_batch_snapshot,
         )
         return
     # 将该文件的告警按 cluster_limit 分批
-    _limit = cluster_limit if isinstance(cluster_limit, int) and cluster_limit > 0 else 50
-    _chunks: List[List[Dict]] = [pending_in_file[i:i + _limit] for i in range(0, len(pending_in_file), _limit)]
+    _limit = (
+        cluster_limit if isinstance(cluster_limit, int) and cluster_limit > 0 else 50
+    )
+    _chunks: List[List[Dict]] = [
+        pending_in_file[i : i + _limit] for i in range(0, len(pending_in_file), _limit)
+    ]
     # 处理每个批次
     for _chunk_idx, _chunk in enumerate(_chunks, start=1):
         process_cluster_chunk(
@@ -1163,30 +1289,42 @@ def initialize_clustering_context(
     compact_candidates: List[Dict],
     sec_dir: Path,
     _progress_append,
-) -> tuple[Dict[str, List[Dict]], Dict, tuple, List[List[Dict]], List[Dict], List[Dict], set]:
+) -> tuple[
+    Dict[str, List[Dict]], Dict, tuple, List[List[Dict]], List[Dict], List[Dict], set
+]:
     """初始化聚类上下文，返回(文件分组, 已有聚类, 快照写入函数, 聚类批次, 聚类记录, 无效聚类, 已聚类gid)"""
     # 按文件分组构建待聚类集合
     _file_groups = group_candidates_by_file(compact_candidates)
     cluster_batches: List[List[Dict]] = []
     cluster_records: List[Dict] = []
     invalid_clusters_for_review: List[Dict] = []
     # 读取已有聚类报告以支持断点
-    _existing_clusters, _completed_cluster_batches, _reviewed_invalid_gids = load_existing_clusters(
-        sec_dir
-    )
+    (
+        _existing_clusters,
+        _completed_cluster_batches,
+        _reviewed_invalid_gids,
+    ) = load_existing_clusters(sec_dir)
     # 创建快照写入函数
-    _write_cluster_batch_snapshot, _write_cluster_report_snapshot = create_cluster_snapshot_writer(
+    (
+        _write_cluster_batch_snapshot,
+        _write_cluster_report_snapshot,
+    ) = create_cluster_snapshot_writer(
         sec_dir, cluster_records, compact_candidates, _progress_append
     )
     # 从断点恢复聚类结果
-    cluster_batches, cluster_records, invalid_clusters_for_review, clustered_gids = restore_clusters_from_checkpoint(
+    (
+        cluster_batches,
+        cluster_records,
+        invalid_clusters_for_review,
+        clustered_gids,
+    ) = restore_clusters_from_checkpoint(
         _existing_clusters, _file_groups, _reviewed_invalid_gids
     )
     return (
         _file_groups,
         _existing_clusters,
@@ -1206,12 +1344,16 @@ def check_unclustered_gids(
     unclustered_gids = all_candidate_gids - clustered_gids
     if unclustered_gids:
         try:
-            typer.secho(f"[jarvis-sec] 发现 {len(unclustered_gids)} 个未聚类的 gid，将进行聚类", fg=typer.colors.YELLOW)
+            PrettyOutput.auto_print(
+                f"[jarvis-sec] 发现 {len(unclustered_gids)} 个未聚类的 gid，将进行聚类",
+            )
         except Exception:
             pass
     else:
         try:
-            typer.secho(f"[jarvis-sec] 所有 {len(all_candidate_gids)} 个候选已聚类，跳过聚类阶段", fg=typer.colors.GREEN)
+            PrettyOutput.auto_print(
+                f"[jarvis-sec] 所有 {len(all_candidate_gids)} 个候选已聚类，跳过聚类阶段",
+            )
         except Exception:
             pass
     return unclustered_gids
@@ -1239,16 +1381,18 @@ def execute_clustering_for_files(
         status_mgr.update_clustering(
             current_file=0,
             total_files=total_files_to_cluster,
-            message="开始聚类分析..."
+            message="开始聚类分析...",
         )
     for _file_idx, (_file, _items) in enumerate(file_groups.items(), start=1):
-        typer.secho(f"\n[jarvis-sec] 聚类文件 {_file_idx}/{total_files_to_cluster}: {_file}", fg=typer.colors.CYAN)
+        PrettyOutput.auto_print(
+            f"\n[jarvis-sec] 聚类文件 {_file_idx}/{total_files_to_cluster}: {_file}",
+        )
         # 更新当前文件进度
         status_mgr.update_clustering(
             current_file=_file_idx,
             total_files=total_files_to_cluster,
             file_name=_file,
-            message=f"正在聚类文件 {_file_idx}/{total_files_to_cluster}: {_file}"
+            message=f"正在聚类文件 {_file_idx}/{total_files_to_cluster}: {_file}",
         )
         # 使用子函数处理文件聚类
         process_file_clustering(
@@ -1277,13 +1421,15 @@ def record_clustering_completion(
     """记录聚类阶段完成"""
     try:
         _cluster_path = sec_dir / "cluster_report.jsonl"
-        _progress_append({
-            "event": "cluster_report_written",
-            "path": str(_cluster_path),
-            "clusters": len(cluster_records),
-            "total_candidates": len(compact_candidates),
-            "note": "每个批次已增量保存，无需重写整个文件",
-        })
+        _progress_append(
+            {
+                "event": "cluster_report_written",
+                "path": str(_cluster_path),
+                "clusters": len(cluster_records),
+                "total_candidates": len(compact_candidates),
+                "note": "每个批次已增量保存，无需重写整个文件",
+            }
+        )
     except Exception:
         pass
@@ -1294,7 +1440,7 @@ def fallback_to_file_based_batches(
 ) -> List[List[Dict]]:
     """若聚类失败或空，则回退为按文件一次处理"""
     fallback_batches: List[List[Dict]] = []
     # 收集所有未聚类的 gid（从所有候选 gid 中排除已聚类的）
     all_gids_in_file_groups = collect_candidate_gids(file_groups)
     gid_to_item_fallback: Dict[int, Dict] = {}
@@ -1306,7 +1452,7 @@ def fallback_to_file_based_batches(
                     gid_to_item_fallback[_gid] = c
             except Exception:
                 pass
     # 如果还有未聚类的 gid，按文件分组创建批次
     if all_gids_in_file_groups:
         # 收集已聚类的 gid（从 cluster_report.jsonl）
@@ -1323,23 +1469,24 @@ def fallback_to_file_based_batches(
                             clustered_gids_fallback.add(_gid_int)
                     except Exception:
                         pass
         unclustered_gids_fallback = all_gids_in_file_groups - clustered_gids_fallback
         if unclustered_gids_fallback:
             # 按文件分组未聚类的 gid
             from collections import defaultdict
             unclustered_by_file: Dict[str, List[Dict]] = defaultdict(list)
             for _gid in unclustered_gids_fallback:
                 item = gid_to_item_fallback.get(_gid)
                 if item:
                     file_key = str(item.get("file") or "")
                     unclustered_by_file[file_key].append(item)
             # 为每个文件创建批次
             for _file, _items in unclustered_by_file.items():
                 if _items:
                     fallback_batches.append(_items)
     return fallback_batches
@@ -1365,13 +1512,15 @@ def process_clustering_phase(
         invalid_clusters_for_review,
         clustered_gids,
     ) = initialize_clustering_context(compact_candidates, sec_dir, _progress_append)
     # 收集所有候选的 gid（用于检查未聚类的 gid）
     all_candidate_gids_in_clustering = collect_candidate_gids(_file_groups)
     # 检查是否有未聚类的 gid
-    unclustered_gids = check_unclustered_gids(all_candidate_gids_in_clustering, clustered_gids)
+    unclustered_gids = check_unclustered_gids(
+        all_candidate_gids_in_clustering, clustered_gids
+    )
     # 如果有未聚类的 gid，继续执行聚类
     if unclustered_gids:
         execute_clustering_for_files(
@@ -1389,12 +1538,15 @@ def process_clustering_phase(
             _write_cluster_batch_snapshot,
             force_save_memory=force_save_memory,
         )
     # 记录聚类阶段完成
-    record_clustering_completion(sec_dir, cluster_records, compact_candidates, _progress_append)
+    record_clustering_completion(
+        sec_dir, cluster_records, compact_candidates, _progress_append
+    )
     # 复核Agent：验证所有标记为无效的聚类（需要从review模块导入）
     from jarvis.jarvis_sec.review import process_review_phase
     cluster_batches = process_review_phase(
         invalid_clusters_for_review,
         entry_path,
@@ -1405,35 +1557,41 @@ def process_clustering_phase(
         cluster_batches,
         sec_dir,
     )
     # 若聚类失败或空，则回退为"按文件一次处理"
     if not cluster_batches:
-        fallback_batches = fallback_to_file_based_batches(_file_groups, _existing_clusters)
+        fallback_batches = fallback_to_file_based_batches(
+            _file_groups, _existing_clusters
+        )
         cluster_batches.extend(fallback_batches)
     # 完整性检查：确保所有候选的 gid 都已被聚类
     # 使用新的文件管理器进行校验
     is_complete, missing_gids_final = validate_clustering_completeness(sec_dir)
     if missing_gids_final:
         # 如果还有遗漏的gid，说明恢复逻辑有问题，需要重新聚类
         try:
             missing_count = len(missing_gids_final)
             if missing_count <= 20:
-                typer.secho(f"[jarvis-sec] 警告：发现 {missing_count} 个遗漏的gid（恢复逻辑可能有问题）: {sorted(list(missing_gids_final))}", fg=typer.colors.RED)
+                PrettyOutput.auto_print(
+                    f"[jarvis-sec] 警告：发现 {missing_count} 个遗漏的gid（恢复逻辑可能有问题）: {sorted(list(missing_gids_final))}",
+                )
             else:
                 missing_list = sorted(list(missing_gids_final))
                 display_list = missing_list[:10] + ["..."] + missing_list[-10:]
-                typer.secho(f"[jarvis-sec] 警告：发现 {missing_count} 个遗漏的gid（恢复逻辑可能有问题）: {display_list}", fg=typer.colors.RED)
+                PrettyOutput.auto_print(
+                    f"[jarvis-sec] 警告：发现 {missing_count} 个遗漏的gid（恢复逻辑可能有问题）: {display_list}",
+                )
         except Exception:
             pass
     # 清理之前创建的单独聚类（避免分析工作量激增）
     cluster_batches = filter_single_gid_clusters(
         cluster_batches,
         sec_dir,
         _progress_append,
     )
     return cluster_batches, invalid_clusters_for_review

jarvis-ai-assistant 0.7.16__py3-none-any.whl → 1.0.2__py3-none-any.whl

jarvis-ai-assistant 0.7.16py3-none-any.whl → 1.0.2py3-none-any.whl