PyPI - jarvis-ai-assistant - Versions diffs - 0.1.107__py3-none-any.whl → 0.1.109__py3-none-any.whl - Mend - Supply Chain Defender

jarvis-ai-assistant 0.1.107py3-none-any.whl → 0.1.109py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of jarvis-ai-assistant might be problematic. Click here for more details.

Files changed (16) hide show

jarvis/jarvis_rag/main.py CHANGED Viewed

@@ -3,7 +3,7 @@ import numpy as np
 import faiss
 from typing import List, Tuple, Optional, Dict
 import pickle
-from jarvis.utils import OutputType, PrettyOutput, get_context_window, get_file_md5, get_max_context_length, get_max_paragraph_length, get_min_paragraph_length, get_thread_count, load_embedding_model, load_rerank_model
+from jarvis.utils import OutputType, PrettyOutput, get_context_token_count, get_embedding, get_embedding_batch, get_file_md5, get_max_context_length, get_max_paragraph_length, get_min_paragraph_length, get_thread_count, init_gpu_config, load_embedding_model
 from jarvis.utils import init_env
 from dataclasses import dataclass
 from tqdm import tqdm
@@ -11,13 +11,9 @@ import fitz  # PyMuPDF for PDF files
 from docx import Document as DocxDocument  # python-docx for DOCX files
 from pathlib import Path
 from jarvis.jarvis_platform.registry import PlatformRegistry
-import shutil
-from datetime import datetime
 import lzma  # 添加 lzma 导入
-from concurrent.futures import ThreadPoolExecutor
 from threading import Lock
-import concurrent.futures
-import re
+import hashlib
 @dataclass
 class Document:
@@ -146,7 +142,7 @@ class RAGTool:
         # Initialize configuration
         self.min_paragraph_length = get_min_paragraph_length()  # Minimum paragraph length
         self.max_paragraph_length = get_max_paragraph_length()  # Maximum paragraph length
-        self.context_window = get_context_window()  # Context window size, default前后各5个片段
+        self.context_window = 5  # Fixed context window size
         self.max_context_length = int(get_max_context_length() * 0.8)
         # Initialize data directory
@@ -163,15 +159,18 @@ class RAGTool:
             PrettyOutput.print(f"Failed to load model: {str(e)}", output_type=OutputType.ERROR)
             raise
-        # Initialize cache and index
-        self.cache_path = os.path.join(self.data_dir, "cache.pkl")
+        # 修改缓存相关初始化
+        self.cache_dir = os.path.join(self.data_dir, "cache")
+        if not os.path.exists(self.cache_dir):
+            os.makedirs(self.cache_dir)
         self.documents: List[Document] = []
-        self.index = None  # IVF index for search
-        self.flat_index = None  # Store original vectors
-        self.file_md5_cache = {}  # Store file MD5 values
+        self.index = None
+        self.flat_index = None
+        self.file_md5_cache = {}
-        # Load cache
-        self._load_cache()
+        # 加载缓存索引
+        self._load_cache_index()
         # Register file processors
         self.file_processors = [
@@ -185,107 +184,99 @@ class RAGTool:
         self.vector_lock = Lock()  # Protect vector list concurrency
         # 初始化 GPU 内存配置
-        self.gpu_config = self._init_gpu_config()
+        self.gpu_config = init_gpu_config()
-    def _init_gpu_config(self) -> Dict:
-        """Initialize GPU configuration based on available hardware
+    def _get_cache_path(self, file_path: str) -> str:
+        """Get cache file path for a document
+        Args:
+            file_path: Original file path
         Returns:
-            Dict: GPU configuration including memory sizes and availability
+            str: Cache file path
         """
-        config = {
-            "has_gpu": False,
-            "shared_memory": 0,
-            "device_memory": 0,
-            "memory_fraction": 0.8  # 默认使用80%的可用内存
-        }
-        try:
-            import torch
-            if torch.cuda.is_available():
-                # 获取GPU信息
-                gpu_mem = torch.cuda.get_device_properties(0).total_memory
-                config["has_gpu"] = True
-                config["device_memory"] = gpu_mem
-                # 估算共享内存 (通常是系统内存的一部分)
-                import psutil
-                system_memory = psutil.virtual_memory().total
-                config["shared_memory"] = min(system_memory * 0.5, gpu_mem * 2)  # 取系统内存的50%或GPU内存的2倍中的较小值
-                # 设置CUDA内存分配
-                torch.cuda.set_per_process_memory_fraction(config["memory_fraction"])
-                torch.cuda.empty_cache()
-                PrettyOutput.print(
-                    f"GPU initialized: {torch.cuda.get_device_name(0)}\n"
-                    f"Device Memory: {gpu_mem / 1024**3:.1f}GB\n"
-                    f"Shared Memory: {config['shared_memory'] / 1024**3:.1f}GB",
-                    output_type=OutputType.SUCCESS
-                )
-            else:
-                PrettyOutput.print("No GPU available, using CPU mode", output_type=OutputType.WARNING)
-        except Exception as e:
-            PrettyOutput.print(f"GPU initialization failed: {str(e)}", output_type=OutputType.WARNING)
-        return config
-    def _load_cache(self):
-        """Load cache data"""
-        if os.path.exists(self.cache_path):
+        # 使用文件路径的哈希作为缓存文件名
+        file_hash = hashlib.md5(file_path.encode()).hexdigest()
+        return os.path.join(self.cache_dir, f"{file_hash}.cache")
+    def _load_cache_index(self):
+        """Load cache index"""
+        index_path = os.path.join(self.data_dir, "index.pkl")
+        if os.path.exists(index_path):
             try:
-                with lzma.open(self.cache_path, 'rb') as f:
+                with lzma.open(index_path, 'rb') as f:
                     cache_data = pickle.load(f)
-                    self.documents = cache_data["documents"]
-                    vectors = cache_data["vectors"]
-                    self.file_md5_cache = cache_data.get("file_md5_cache", {})  # 加载MD5缓存
+                    self.file_md5_cache = cache_data.get("file_md5_cache", {})
-                # 重建索引
-                if vectors is not None:
-                    self._build_index(vectors)
+                # 从各个缓存文件加载文档
+                for file_path in self.file_md5_cache:
+                    cache_path = self._get_cache_path(file_path)
+                    if os.path.exists(cache_path):
+                        try:
+                            with lzma.open(cache_path, 'rb') as f:
+                                file_cache = pickle.load(f)
+                                self.documents.extend(file_cache["documents"])
+                        except Exception as e:
+                            PrettyOutput.print(f"Failed to load cache for {file_path}: {str(e)}",
+                                            output_type=OutputType.WARNING)
+                # 重建向量索引
+                if self.documents:
+                    vectors = []
+                    for doc in self.documents:
+                        cache_path = self._get_cache_path(doc.metadata['file_path'])
+                        if os.path.exists(cache_path):
+                            with lzma.open(cache_path, 'rb') as f:
+                                file_cache = pickle.load(f)
+                                doc_idx = next((i for i, d in enumerate(file_cache["documents"])
+                                            if d.metadata['chunk_index'] == doc.metadata['chunk_index']), None)
+                                if doc_idx is not None:
+                                    vectors.append(file_cache["vectors"][doc_idx])
+                    if vectors:
+                        vectors = np.vstack(vectors)
+                        self._build_index(vectors)
                 PrettyOutput.print(f"Loaded {len(self.documents)} document fragments",
                                 output_type=OutputType.INFO)
             except Exception as e:
-                PrettyOutput.print(f"Failed to load cache: {str(e)}",
+                PrettyOutput.print(f"Failed to load cache index: {str(e)}",
                                 output_type=OutputType.WARNING)
                 self.documents = []
                 self.index = None
                 self.flat_index = None
                 self.file_md5_cache = {}
-    def _save_cache(self, vectors: np.ndarray):
-        """Optimize cache saving"""
+    def _save_cache(self, file_path: str, documents: List[Document], vectors: np.ndarray):
+        """Save cache for a single file
+        Args:
+            file_path: File path
+            documents: List of documents
+            vectors: Document vectors
+        """
         try:
+            # 保存文件缓存
+            cache_path = self._get_cache_path(file_path)
             cache_data = {
-                "version": "1.0",
-                "timestamp": datetime.now().isoformat(),
-                "documents": self.documents,
-                "vectors": vectors.copy() if vectors is not None else None,  # Create a copy of the array
-                "file_md5_cache": dict(self.file_md5_cache),  # Create a copy of the dictionary
-                "metadata": {
-                    "vector_dim": self.vector_dim,
-                    "total_docs": len(self.documents),
-                    "model_name": self.embedding_model.__class__.__name__
-                }
+                "documents": documents,
+                "vectors": vectors
             }
-            # First serialize the data to a byte stream
-            data = pickle.dumps(cache_data, protocol=pickle.HIGHEST_PROTOCOL)
-            # Then use LZMA to compress the byte stream
-            with lzma.open(self.cache_path, 'wb') as f:
-                f.write(data)
-            # Create a backup
-            backup_path = f"{self.cache_path}.backup"
-            shutil.copy2(self.cache_path, backup_path)
-            PrettyOutput.print(f"Cache saved: {len(self.documents)} document fragments",
-                            output_type=OutputType.INFO)
+            with lzma.open(cache_path, 'wb') as f:
+                pickle.dump(cache_data, f)
+            # 更新并保存索引
+            index_path = os.path.join(self.data_dir, "index.pkl")
+            index_data = {
+                "file_md5_cache": self.file_md5_cache
+            }
+            with lzma.open(index_path, 'wb') as f:
+                pickle.dump(index_data, f)
         except Exception as e:
-            PrettyOutput.print(f"Failed to save cache: {str(e)}",
-                            output_type=OutputType.ERROR)
-            raise
+            PrettyOutput.print(f"Failed to save cache: {str(e)}", output_type=OutputType.ERROR)
     def _build_index(self, vectors: np.ndarray):
         """Build FAISS index"""
@@ -364,106 +355,32 @@ class RAGTool:
         return paragraphs
-    def _get_embedding(self, text: str) -> np.ndarray:
-        """Get the vector representation of the text"""
-        embedding = self.embedding_model.encode(text,
-                                            normalize_embeddings=True,
-                                            show_progress_bar=False)
-        return np.array(embedding, dtype=np.float32)
-    def _get_embedding_batch(self, texts: List[str], batch_size: int = 32) -> np.ndarray:
-        """Get embeddings for a batch of texts efficiently"""
-        try:
-            if self.gpu_config["has_gpu"]:
-                import torch
-                torch.cuda.empty_cache()
-                # 使用较小的批处理大小
-                optimal_batch_size = min(16, len(texts))
-                all_embeddings = []
-                with tqdm(total=len(texts), desc="Vectorizing") as pbar:
-                    for i in range(0, len(texts), optimal_batch_size):
-                        try:
-                            batch = texts[i:i + optimal_batch_size]
-                            embeddings = self.embedding_model.encode(
-                                batch,
-                                normalize_embeddings=True,
-                                show_progress_bar=False,
-                                batch_size=4,  # 减小内部批处理大小
-                                convert_to_tensor=True
-                            )
-                            # 立即移动到 CPU
-                            embeddings = embeddings.cpu().numpy()
-                            all_embeddings.append(embeddings)
-                            pbar.update(len(batch))
-                            # 清理 GPU 缓存
-                            torch.cuda.empty_cache()
-                        except RuntimeError as e:
-                            if "out of memory" in str(e):
-                                # 如果内存不足，减小批次大小重试
-                                if optimal_batch_size > 4:
-                                    optimal_batch_size //= 2
-                                    PrettyOutput.print(
-                                        f"CUDA out of memory, reducing batch size to {optimal_batch_size}",
-                                        OutputType.WARNING
-                                    )
-                                    i -= optimal_batch_size  # 重试当前批次
-                                    continue
-                            raise
-                return np.vstack(all_embeddings)
-            else:
-                # CPU 模式
-                return self.embedding_model.encode(
-                    texts,
-                    normalize_embeddings=True,
-                    show_progress_bar=True,
-                    batch_size=8,
-                    convert_to_tensor=False
-                )
-        except Exception as e:
-            PrettyOutput.print(f"Batch embedding failed: {str(e)}", OutputType.ERROR)
-            return np.zeros((len(texts), self.vector_dim), dtype=np.float32) # type: ignore
     def _process_document_batch(self, documents: List[Document]) -> np.ndarray:
-        """Process a batch of documents using shared memory
-        Args:
-            documents: List of documents to process
-        Returns:
-            np.ndarray: Document vectors
-        """
+        """Process a batch of documents using shared memory"""
         try:
-            import torch
-            # 估算内存需求
-            total_content_size = sum(len(doc.content) for doc in documents)
-            est_memory_needed = total_content_size * 4  # 粗略估计
-            # 如果预估内存超过共享内存限制，分批处理
-            if est_memory_needed > self.gpu_config["shared_memory"] * 0.7:
-                batch_size = max(1, int(len(documents) * (self.gpu_config["shared_memory"] * 0.7 / est_memory_needed)))
-                all_vectors = []
-                for i in range(0, len(documents), batch_size):
-                    batch = documents[i:i + batch_size]
-                    vectors = self._process_document_batch(batch)
-                    all_vectors.append(vectors)
-                return np.vstack(all_vectors)
-            # 正常处理单个批次
             texts = []
+            self.documents = []  # Reset documents to store chunks
             for doc in documents:
-                combined_text = f"File:{doc.metadata['file_path']} Content:{doc.content}"
-                texts.append(combined_text)
-            return self._get_embedding_batch(texts)
+                # Split original document into chunks
+                chunks = self._split_text(doc.content)
+                for chunk_idx, chunk in enumerate(chunks):
+                    # Create new Document for each chunk
+                    new_metadata = doc.metadata.copy()
+                    new_metadata.update({
+                        'chunk_index': chunk_idx,
+                        'total_chunks': len(chunks),
+                        'original_length': len(doc.content)
+                    })
+                    self.documents.append(Document(
+                        content=chunk,
+                        metadata=new_metadata,
+                        md5=doc.md5
+                    ))
+                    texts.append(f"File:{doc.metadata['file_path']} Chunk:{chunk_idx} Content:{chunk}")
+            return get_embedding_batch(self.embedding_model, texts)
         except Exception as e:
             PrettyOutput.print(f"Batch processing failed: {str(e)}", OutputType.ERROR)
             return np.zeros((0, self.vector_dim), dtype=np.float32) # type: ignore
@@ -572,74 +489,64 @@ class RAGTool:
         unchanged_documents = [doc for doc in self.documents
                             if doc.metadata['file_path'] in unchanged_files]
-        # Process files in parallel with optimized vectorization
+        # Process files one by one with optimized vectorization
         if files_to_process:
             PrettyOutput.print(f"Processing {len(files_to_process)} files...", OutputType.INFO)
-            # Step 1: 并行提取文本内容
-            documents_to_process = []
-            with ThreadPoolExecutor(max_workers=self.thread_count) as executor:
-                futures = {
-                    executor.submit(self._process_file, file_path): file_path
-                    for file_path in files_to_process
-                }
-                with tqdm(total=len(files_to_process), desc="Extracting text") as pbar:
-                    for future in concurrent.futures.as_completed(futures):
-                        try:
-                            docs = future.result()
-                            if docs:
-                                documents_to_process.extend(docs)
-                            pbar.update(1)
-                        except Exception as e:
-                            PrettyOutput.print(f"File processing failed: {str(e)}", OutputType.ERROR)
-                            pbar.update(1)
-            # Step 2: 优化的批量向量化
-            if documents_to_process:
-                PrettyOutput.print(f"Vectorizing {len(documents_to_process)} documents...", OutputType.INFO)
-                # 准备向量化的文本
-                texts_to_vectorize = []
-                for doc in documents_to_process:
-                    # 优化文本组合，减少内存使用
-                    combined_text = f"File:{doc.metadata['file_path']} Content:{doc.content}"
-                    texts_to_vectorize.append(combined_text)
-                # 使用较小的初始批处理大小
-                initial_batch_size = min(
-                    32,  # 最大批次大小
-                    max(4, len(texts_to_vectorize) // 8),  # 基于文档数的批次大小
-                    len(texts_to_vectorize)  # 不超过总文档数
-                )
-                # 批量处理向量
-                vectors = self._get_embedding_batch(texts_to_vectorize, initial_batch_size)
+            new_documents = []
+            new_vectors = []
+            with tqdm(total=len(files_to_process), desc="Processing files") as pbar:
+                for file_path in files_to_process:
+                    try:
+                        # Process single file
+                        file_docs = self._process_file(file_path)
+                        if file_docs:
+                            # Vectorize documents from this file
+                            texts_to_vectorize = [
+                                f"File:{doc.metadata['file_path']} Content:{doc.content}"
+                                for doc in file_docs
+                            ]
+                            file_vectors = get_embedding_batch(self.embedding_model, texts_to_vectorize)
+                            # Save cache for this file
+                            self._save_cache(file_path, file_docs, file_vectors)
+                            # Accumulate documents and vectors
+                            new_documents.extend(file_docs)
+                            new_vectors.append(file_vectors)
+                    except Exception as e:
+                        PrettyOutput.print(f"Failed to process {file_path}: {str(e)}", OutputType.ERROR)
+                    pbar.update(1)
-                # 更新文档和索引
-                self.documents.extend(documents_to_process)
+            # Update documents list
+            self.documents.extend(new_documents)
-                # 构建最终索引
+            # Build final index
+            if new_vectors:
+                all_new_vectors = np.vstack(new_vectors)
                 if self.flat_index is not None:
-                    # 获取未更改文档的向量
+                    # Get vectors for unchanged documents
                     unchanged_vectors = self._get_unchanged_vectors(unchanged_documents)
                     if unchanged_vectors is not None:
-                        final_vectors = np.vstack([unchanged_vectors, vectors])
+                        final_vectors = np.vstack([unchanged_vectors, all_new_vectors])
                     else:
-                        final_vectors = vectors
+                        final_vectors = all_new_vectors
                 else:
-                    final_vectors = vectors
+                    final_vectors = all_new_vectors
-                # 构建索引并保存缓存
+                # Build index
                 self._build_index(final_vectors)
-                self._save_cache(final_vectors)
-                PrettyOutput.print(
-                    f"Indexed {len(self.documents)} documents "
-                    f"(New/Modified: {len(documents_to_process)}, "
-                    f"Unchanged: {len(unchanged_documents)})",
-                    OutputType.SUCCESS
-                )
+            PrettyOutput.print(
+                f"Indexed {len(self.documents)} documents "
+                f"(New/Modified: {len(new_documents)}, "
+                f"Unchanged: {len(unchanged_documents)})",
+                OutputType.SUCCESS
+            )
     def _get_unchanged_vectors(self, unchanged_documents: List[Document]) -> Optional[np.ndarray]:
         """Get vectors for unchanged documents from existing index"""
@@ -663,40 +570,62 @@ class RAGTool:
             return None
     def search(self, query: str, top_k: int = 30) -> List[Tuple[Document, float]]:
-        """Search documents using vector similarity
-        Args:
-            query: Search query
-            top_k: Number of results to return
-        """
+        """Search documents with context window"""
         if not self.index:
             PrettyOutput.print("Index not built, building...", output_type=OutputType.INFO)
             self.build_index(self.root_dir)
         # Get query vector
-        query_vector = self._get_embedding(query)
+        query_vector = get_embedding(self.embedding_model, query)
         query_vector = query_vector.reshape(1, -1)
         # Search with more candidates
         initial_k = min(top_k * 4, len(self.documents))
         distances, indices = self.index.search(query_vector, initial_k) # type: ignore
-        # Process results
+        # Process results with context window
         results = []
         seen_files = set()
         for idx, dist in zip(indices[0], distances[0]):
             if idx != -1:
                 doc = self.documents[idx]
                 similarity = 1.0 / (1.0 + float(dist))
-                if similarity > 0.3:  # 降低过滤阈值以获取更多结果
+                if similarity > 0.3:
                     file_path = doc.metadata['file_path']
                     if file_path not in seen_files:
                         seen_files.add(file_path)
-                        results.append((doc, similarity))
-                        if len(results) >= top_k:
+                        # Get full context from original document
+                        original_doc = next((d for d in self.documents
+                                           if d.metadata['file_path'] == file_path), None)
+                        if original_doc:
+                            window_docs = []  # Add this line to initialize the list
+                            full_content = original_doc.content
+                            # Find all chunks from this file
+                            file_chunks = [d for d in self.documents
+                                         if d.metadata['file_path'] == file_path]
+                            # Add all related chunks
+                            for chunk_doc in file_chunks:
+                                window_docs.append((chunk_doc, similarity * 0.9))
+                        results.extend(window_docs)
+                        if len(results) >= top_k * (2 * self.context_window + 1):
                             break
-        return results
+        # Sort by similarity and deduplicate
+        results.sort(key=lambda x: x[1], reverse=True)
+        seen = set()
+        final_results = []
+        for doc, score in results:
+            key = (doc.metadata['file_path'], doc.metadata['chunk_index'])
+            if key not in seen:
+                seen.add(key)
+                final_results.append((doc, score))
+                if len(final_results) >= top_k:
+                    break
+        return final_results
     def query(self, query: str) -> List[Document]:
         """Query related documents
@@ -718,13 +647,6 @@ class RAGTool:
             if not results:
                 return None
-            # 显示找到的文档
-            for doc, score in results:
-                output = f"""File: {doc.metadata['file_path']} (Score: {score:.3f})\n"""
-                output += f"""Fragment {doc.metadata['chunk_index'] + 1}/{doc.metadata['total_chunks']}\n"""
-                output += f"""Content:\n{doc.content}\n"""
-                PrettyOutput.print(output, output_type=OutputType.INFO, lang="markdown")
             # 构建提示词
             prompt = f"""Based on the following document fragments, please answer the user's question accurately and comprehensively.
@@ -733,8 +655,8 @@ Question: {question}
 Relevant documents (ordered by relevance):
 """
             # 添加上下文，控制长度
-            available_length = self.max_context_length - len(prompt) - 1000
-            current_length = 0
+            available_count = self.max_context_length - get_context_token_count(prompt) - 1000
+            current_count = 0
             for doc, score in results:
                 doc_content = f"""
@@ -742,7 +664,11 @@ Relevant documents (ordered by relevance):
 {doc.content}
 ---
 """
-                if current_length + len(doc_content) > available_length:
+                prompt += "Answer Format:\n"
+                prompt += "1. Answer the question accurately and comprehensively.\n"
+                prompt += "2. If the documents don't fully answer the question, please indicate what information is missing.\n"
+                prompt += "3. Reference the documents in the answer.\n"
+                if current_count + get_context_token_count(doc_content) > available_count:
                     PrettyOutput.print(
                         "Due to context length limit, some fragments were omitted",
                         output_type=OutputType.WARNING
@@ -750,7 +676,7 @@ Relevant documents (ordered by relevance):
                     break
                 prompt += doc_content
-                current_length += len(doc_content)
+                current_count += get_context_token_count(doc_content)
             prompt += "\nIf the documents don't fully answer the question, please indicate what information is missing."

jarvis/jarvis_tools/registry.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import Any, Callable, Dict, List, Optional
 from jarvis.jarvis_platform.registry import PlatformRegistry
 from jarvis.jarvis_tools.base import Tool
-from jarvis.utils import OutputType, PrettyOutput, get_max_context_length
+from jarvis.utils import OutputType, PrettyOutput, get_context_token_count, get_max_context_length
 tool_call_help = """## Tool Usage Format
@@ -137,7 +137,8 @@ class ToolRegistry:
                         hasattr(item, 'name') and
                         hasattr(item, 'description') and
                         hasattr(item, 'parameters') and
-                        hasattr(item, 'execute')):
+                        hasattr(item, 'execute') and
+                        item.name == module_name):
                         if hasattr(item, "check"):
                             if not item.check():
@@ -247,16 +248,16 @@ arguments:
                 PrettyOutput.section("Execution successful", OutputType.SUCCESS)
                 # If the output exceeds 4k characters, use a large model to summarize
-                if len(output) > self.max_context_length:
+                if get_context_token_count(output) > self.max_context_length:
                     try:
                         PrettyOutput.print("Output is too long, summarizing...", OutputType.PROGRESS)
                         model = PlatformRegistry.get_global_platform_registry().get_normal_platform()
                         # If the output exceeds the maximum context length, only take the last part
-                        max_len = self.max_context_length
-                        if len(output) > max_len:
-                            output_to_summarize = output[-max_len:]
-                            truncation_notice = f"\n(Note: Due to the length of the output, only the last {max_len} characters are summarized)"
+                        max_count = self.max_context_length
+                        if get_context_token_count(output) > max_count:
+                            output_to_summarize = output[-max_count:]
+                            truncation_notice = f"\n(Note: Due to the length of the output, only the last {max_count} characters are summarized)"
                         else:
                             output_to_summarize = output
                             truncation_notice = ""