PyPI - agentscope-runtime - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl - Mend

agentscope-runtime 0.1.1py3-none-any.whl → 0.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

agentscope_runtime/engine/services/context_manager.py CHANGED Viewed

@@ -4,12 +4,19 @@ from typing import List
 from .manager import ServiceManager
 from .memory_service import MemoryService, InMemoryMemoryService
+from .rag_service import RAGService
 from .session_history_service import (
     SessionHistoryService,
     Session,
     InMemorySessionHistoryService,
 )
-from ..schemas.agent_schemas import Message
+from ..schemas.agent_schemas import (
+    Message,
+    MessageType,
+    Role,
+    TextContent,
+    ContentType,
+)
 class ContextComposer:
@@ -19,6 +26,7 @@ class ContextComposer:
         session: Session,  # session
         memory_service: MemoryService = None,
         session_history_service: SessionHistoryService = None,
+        rag_service: RAGService = None,
     ):
         # session
         if session_history_service:
@@ -42,6 +50,18 @@ class ContextComposer:
             )
             session.messages = memories + session.messages
+        # rag
+        if rag_service:
+            query = await rag_service.get_query_text(request_input[-1])
+            docs = await rag_service.retrieve(query=query, k=5)
+            cooked_doc = "\n".join(docs)
+            message = Message(
+                type=MessageType.MESSAGE,
+                role=Role.SYSTEM,
+                content=[TextContent(type=ContentType.TEXT, text=cooked_doc)],
+            )
+            session.messages.append(message)
 class ContextManager(ServiceManager):
     """
@@ -53,10 +73,12 @@ class ContextManager(ServiceManager):
         context_composer_cls=ContextComposer,
         session_history_service: SessionHistoryService = None,
         memory_service: MemoryService = None,
+        rag_service: RAGService = None,
     ):
         self._context_composer_cls = context_composer_cls
         self._session_history_service = session_history_service
         self._memory_service = memory_service
+        self._rag_service = rag_service
         super().__init__()
     def _register_default_services(self):
@@ -68,6 +90,8 @@ class ContextManager(ServiceManager):
         self.register_service("session", self._session_history_service)
         self.register_service("memory", self._memory_service)
+        if self._rag_service:
+            self.register_service("rag", self._rag_service)
     async def compose_context(
         self,
@@ -77,6 +101,7 @@ class ContextManager(ServiceManager):
         await self._context_composer_cls.compose(
             memory_service=self._memory_service,
             session_history_service=self._session_history_service,
+            rag_service=self._rag_service,
             session=session,
             request_input=request_input,
         )
@@ -119,10 +144,12 @@ class ContextManager(ServiceManager):
 async def create_context_manager(
     memory_service: MemoryService = None,
     session_history_service: SessionHistoryService = None,
+    rag_service: RAGService = None,
 ):
     manager = ContextManager(
         memory_service=memory_service,
         session_history_service=session_history_service,
+        rag_service=rag_service,
     )
     async with manager:

agentscope_runtime/engine/services/rag_service.py ADDED Viewed

@@ -0,0 +1,101 @@
+# -*- coding: utf-8 -*-
+from typing import Optional
+from .base import ServiceWithLifecycleManager
+from ..schemas.agent_schemas import Message, MessageType
+class RAGService(ServiceWithLifecycleManager):
+    """
+    RAG Service
+    """
+    async def get_query_text(self, message: Message) -> str:
+        """
+        Gets the query text from the messages.
+        Args:
+            message: A list of messages.
+        Returns:
+            The query text.
+        """
+        if message:
+            if message.type == MessageType.MESSAGE:
+                for content in message.content:
+                    if content.type == "text":
+                        return content.text
+        return ""
+    async def retrieve(self, query: str, k: int = 1) -> list[str]:
+        raise NotImplementedError
+DEFAULT_URI = "milvus_demo.db"
+class LangChainRAGService(RAGService):
+    """
+    RAG Service using LangChain
+    """
+    def __init__(
+        self,
+        uri: Optional[str] = None,
+        docs: Optional[list[str]] = None,
+    ):
+        from langchain_community.embeddings import DashScopeEmbeddings
+        from langchain_milvus import Milvus
+        self.Milvus = Milvus
+        self.embeddings = DashScopeEmbeddings()
+        self.vectorstore = None
+        if uri:
+            self.uri = uri
+            self.from_db()
+        elif docs:
+            self.uri = DEFAULT_URI
+            self.from_docs(docs)
+        else:
+            docs = []
+            self.uri = DEFAULT_URI
+            self.from_docs(docs)
+    def from_docs(self, docs=None):
+        if docs is None:
+            docs = []
+        self.vectorstore = self.Milvus.from_documents(
+            documents=docs,
+            embedding=self.embeddings,
+            connection_args={
+                "uri": self.uri,
+            },
+            drop_old=False,
+        )
+    def from_db(self):
+        self.vectorstore = self.Milvus(
+            embedding_function=self.embeddings,
+            connection_args={"uri": self.uri},
+            index_params={"index_type": "FLAT", "metric_type": "L2"},
+        )
+    async def retrieve(self, query: str, k: int = 1) -> list[str]:
+        if self.vectorstore is None:
+            raise ValueError(
+                "Vector store not initialized. Call build_index first.",
+            )
+        docs = self.vectorstore.similarity_search(query, k=k)
+        return [doc.page_content for doc in docs]
+    async def start(self) -> None:
+        """Starts the service."""
+    async def stop(self) -> None:
+        """Stops the service."""
+    async def health(self) -> bool:
+        """Checks the health of the service."""
+        return True

agentscope_runtime/sandbox/box/training_box/env_service.py CHANGED Viewed

@@ -749,4 +749,4 @@ if __name__ == "__main__":
         sys.exit(1)
     print(f"Starting server on {args.portal}:{args.port}")
-    uvicorn.run(app, host=args.portal, port=args.port)
+    uvicorn.run(app, host=args.portal, port=args.port, log_level="error")

agentscope_runtime/sandbox/box/training_box/environments/bfcl/bfcl_dataprocess.py ADDED Viewed

@@ -0,0 +1,216 @@
+# -*- coding: utf-8 -*-
+"""
+BFCL数据预处理脚本 - 数据处理与分割工具
+脚本用途：
+1. 加载指定测试类别的用例
+2. 对测试用例进行预处理，加载工具集合schema
+3. 将数据集按指定比例分割为训练集和测试集
+4. 分别保存数据文件和ID文件
+使用示例：
+result = bfcl_task_preprocess(
+    test_categories=["multi_turn_base"],  # 指定测试类别
+    train_ratio=0.5,                      # 训练集占50%
+    output_dir="/path/to/output"           # 输出目录
+)
+生成两个文件：
+{类别}_processed.jsonl：处理后的数据集
+{类别}_split_ids.json：训练/测试集ID
+"""
+from typing import List, Dict, Any, Optional
+import json
+import random
+from pathlib import Path
+from bfcl_eval.constants.eval_config import (
+    PROMPT_PATH,
+)
+from bfcl_eval.eval_checker.eval_runner_helper import load_file
+from bfcl_eval.utils import (
+    parse_test_category_argument,
+    populate_test_cases_with_predefined_functions,
+)
+TEST_FILE_MAPPING = {
+    "simple": "BFCL_v4_simple.json",
+    "irrelevance": "BFCL_v4_irrelevance.json",
+    "parallel": "BFCL_v4_parallel.json",
+    "multiple": "BFCL_v4_multiple.json",
+    "parallel_multiple": "BFCL_v4_parallel_multiple.json",
+    "java": "BFCL_v4_java.json",
+    "javascript": "BFCL_v4_javascript.json",
+    "live_simple": "BFCL_v4_live_simple.json",
+    "live_multiple": "BFCL_v4_live_multiple.json",
+    "live_parallel": "BFCL_v4_live_parallel.json",
+    "live_parallel_multiple": "BFCL_v4_live_parallel_multiple.json",
+    "live_irrelevance": "BFCL_v4_live_irrelevance.json",
+    "live_relevance": "BFCL_v4_live_relevance.json",
+    "multi_turn_base": "BFCL_v4_multi_turn_base.json",
+    "multi_turn_miss_func": "BFCL_v4_multi_turn_miss_func.json",
+    "multi_turn_miss_param": "BFCL_v4_multi_turn_miss_param.json",
+    "multi_turn_long_context": "BFCL_v4_multi_turn_long_context.json",
+}
+def bfcl_task_preprocess(
+    test_categories: Optional[List[str]] = None,
+    train_ratio: float = 0.5,
+    random_seed: int = 42,
+    output_dir: str = "",
+    enable_shuffle: bool = False,
+) -> Dict[str, List[Dict[str, Any]]]:
+    """
+    Preprocess training dataset by loading test cases, processing them and
+    splitting into train/test sets.
+    Args:
+        test_categories: List of test categories to process. Can be specific
+        category names or collection names
+                (e.g. 'all', 'multi_turn'). If None, process all categories.
+        train_ratio: Ratio for training set split, range [0, 1]. If 1.0, no
+        split is performed.
+        random_seed: Random seed for reproducible data splitting.
+        output_dir: Output directory path.
+        output_prefix: Prefix for output files.
+        save_by_category: Whether to save files separately by category.
+        save_parquet: Whether to save parquet files.
+        enable_export_verl_data_schema: Whether to export data in verl format
+        schema.
+    Returns:
+        Dict containing train and test sets: {'train': [...], 'test': [...]}
+    """
+    def load_selected_test_cases(categories: List[str]):
+        all_test_entries_by_category = {}
+        try:
+            test_categories_resolved = parse_test_category_argument(categories)
+        except Exception as e:
+            print(f"Error: Invalid test categories - {str(e)}")
+            return {}
+        print(f"Selected test categories: {test_categories_resolved}")
+        for category in test_categories_resolved:
+            if category in TEST_FILE_MAPPING:
+                test_file_path = TEST_FILE_MAPPING[category]
+                test_entries = load_file(PROMPT_PATH / test_file_path)
+                print(f"Loaded {len(test_entries)} test cases from {category}")
+                if category not in all_test_entries_by_category:
+                    all_test_entries_by_category[category] = []
+                all_test_entries_by_category[category].extend(test_entries)
+        return all_test_entries_by_category
+    random.seed(random_seed)
+    if test_categories is None:
+        test_categories = ["all"]
+    all_test_cases_by_category = load_selected_test_cases(test_categories)
+    if not all_test_cases_by_category:
+        print("Warning: No test cases found")
+        return {"train": [], "test": []}
+    total_cases = sum(
+        len(cases) for cases in all_test_cases_by_category.values()
+    )
+    print(
+        f"Loaded {total_cases} test cases in total across \
+            {len(all_test_cases_by_category)} categories",
+    )
+    all_processed_cases = []
+    processed_cases_by_category = {}
+    for category, test_cases in all_test_cases_by_category.items():
+        print(f"Processing category: {category}")
+        category_processed_cases = (
+            populate_test_cases_with_predefined_functions(test_cases)
+        )
+        processed_cases_by_category[category] = category_processed_cases
+        all_processed_cases.extend(category_processed_cases)
+        print(
+            f"Successfully processed {len(category_processed_cases)} test \
+                cases for {category}",
+        )
+    print(
+        f"Successfully processed {len(all_processed_cases)} test \
+            cases in total",
+    )
+    if enable_shuffle:
+        random.shuffle(all_processed_cases)
+    train_size = int(len(all_processed_cases) * train_ratio)
+    train_cases = all_processed_cases[:train_size]
+    test_cases = all_processed_cases[train_size:]
+    print(
+        f"Data split complete: {len(train_cases)} training, \
+            {len(test_cases)} test cases",
+    )
+    case_result = {"train": train_cases, "test": test_cases}
+    if output_dir:
+        output_path = Path(output_dir)
+        output_path.mkdir(parents=True, exist_ok=True)
+        test_categories_str = "_".join(test_categories)
+        full_jsonl_path = (
+            output_path / f"{test_categories_str}_processed.jsonl"
+        )
+        with open(full_jsonl_path, "w", encoding="utf-8") as f:
+            for case in all_processed_cases:
+                f.write(json.dumps(case, ensure_ascii=False) + "\n")
+        print(f"Full dataset saved to: {full_jsonl_path}")
+        split_ids = {
+            "train": [
+                case.get("id", idx) for idx, case in enumerate(train_cases)
+            ],
+            "val": [
+                case.get("id", idx) for idx, case in enumerate(test_cases)
+            ],
+        }
+        split_ids_path = output_path / f"{test_categories_str}_split_ids.json"
+        with open(split_ids_path, "w", encoding="utf-8") as f:
+            json.dump(split_ids, f, ensure_ascii=False, indent=2)
+        print(f"Split IDs saved to: {split_ids_path}")
+    return case_result
+if __name__ == "__main__":
+    category_list = [
+        "all",
+        "all_scoring",
+        "multi_turn",
+        "single_turn",
+        "live",
+        "non_live",
+        "non_python",
+        "python",
+    ]
+    for bfcl_category in category_list:
+        result = bfcl_task_preprocess(
+            test_categories=[bfcl_category],
+            train_ratio=0.5,
+            output_dir="./bfcl/multi_turn",
+        )
+        print("-" * 50)
+        print("Processing complete!")
+        if result["train"]:
+            print(f"Training samples: {len(result['train'])}")
+        if result["test"]:
+            print(f"Test samples: {len(result['test'])}")

agentscope-runtime 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

agentscope-runtime 0.1.1py3-none-any.whl → 0.1.2py3-none-any.whl