PyPI - pycityagent - Versions diffs - 2.0.0a21__py3-none-any.whl → 2.0.0a24__py3-none-any.whl - Mend

pycityagent 2.0.0a21py3-none-any.whl → 2.0.0a24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

pycityagent/__init__.py +2 -1
pycityagent/agent.py +18 -4
pycityagent/environment/sim/aoi_service.py +2 -1
pycityagent/environment/sim/clock_service.py +2 -1
pycityagent/environment/sim/economy_services.py +9 -8
pycityagent/environment/sim/lane_service.py +6 -5
pycityagent/environment/sim/light_service.py +10 -8
pycityagent/environment/sim/person_service.py +12 -11
pycityagent/environment/sim/road_service.py +3 -2
pycityagent/environment/sim/social_service.py +4 -3
pycityagent/environment/utils/protobuf.py +6 -4
pycityagent/llm/__init__.py +7 -2
pycityagent/llm/embeddings.py +231 -0
pycityagent/memory/__init__.py +2 -0
pycityagent/memory/faiss_query.py +302 -0
pycityagent/memory/memory.py +131 -137
pycityagent/memory/memory_base.py +7 -6
pycityagent/memory/profile.py +7 -6
pycityagent/memory/self_define.py +8 -7
pycityagent/memory/state.py +7 -6
pycityagent/memory/utils.py +2 -1
pycityagent/simulation/agentgroup.py +42 -25
pycityagent/simulation/simulation.py +9 -1
pycityagent/utils/parsers/json_parser.py +3 -3
pycityagent/workflow/block.py +2 -1
{pycityagent-2.0.0a21.dist-info → pycityagent-2.0.0a24.dist-info}/METADATA +5 -1
{pycityagent-2.0.0a21.dist-info → pycityagent-2.0.0a24.dist-info}/RECORD +28 -27
pycityagent/llm/embedding.py +0 -136
{pycityagent-2.0.0a21.dist-info → pycityagent-2.0.0a24.dist-info}/WHEEL +0 -0

pycityagent/__init__.py CHANGED Viewed

@@ -5,6 +5,7 @@ Pycityagent: 城市智能体构建框架
 from .agent import Agent, CitizenAgent, InstitutionAgent
 from .environment import Simulator
 import logging
+from .llm import SentenceEmbedding
 # 创建一个 pycityagent 记录器
 logger = logging.getLogger("pycityagent")
@@ -19,4 +20,4 @@ if not logger.hasHandlers():
     handler.setFormatter(formatter)
     logger.addHandler(handler)
-__all__ = ["Agent", "Simulator", "CitizenAgent", "InstitutionAgent"]
+__all__ = ["Agent", "Simulator", "CitizenAgent", "InstitutionAgent","SentenceEmbedding",]

pycityagent/agent.py CHANGED Viewed

@@ -236,7 +236,15 @@ class Agent(ABC):
         # 添加记忆上下文
         if self._memory:
-            relevant_memories = await self._memory.search(survey_prompt)
+            relevant_memories = await self.memory.search(survey_prompt)
+            formatted_results = []
+            # for result in top_results:
+            #     formatted_results.append(
+            #         f"- [{result['type']}] {result['content']} "
+            #         f"(相关度: {result['similarity']:.2f})"
+            #     )
             if relevant_memories:
                 dialog.append(
                     {
@@ -458,13 +466,18 @@ class Agent(ABC):
         topic = f"exps/{self._exp_id}/agents/{to_agent_uuid}/{sub_topic}"
         await self._messager.send_message(topic, payload)
-    async def send_message_to_agent(self, to_agent_uuid: str, content: str):
+    async def send_message_to_agent(
+        self, to_agent_uuid: str, content: str, type: str = "social"
+    ):
         """通过 Messager 发送消息"""
         if self._messager is None:
             raise RuntimeError("Messager is not set")
+        if type not in ["social", "economy"]:
+            logger.warning(f"Invalid message type: {type}, sent from {self._uuid}")
         payload = {
             "from": self._uuid,
             "content": content,
+            "type": type,
             "timestamp": int(datetime.now().timestamp() * 1000),
             "day": await self.simulator.get_simulator_day(),
             "t": await self.simulator.get_simulator_second_from_start_of_day(),
@@ -485,11 +498,11 @@ class Agent(ABC):
         auros.append(_message_dict)
         pg_list.append((_message_dict, _date_time))
         # Avro
-        if self._avro_file is not None:
+        if self._avro_file is not None and type == "social":
             with open(self._avro_file["dialog"], "a+b") as f:
                 fastavro.writer(f, DIALOG_SCHEMA, auros, codec="snappy")
         # Pg
-        if self._pgsql_writer is not None:
+        if self._pgsql_writer is not None and type == "social":
             if self._last_asyncio_pg_task is not None:
                 await self._last_asyncio_pg_task
             _keys = ["id", "day", "t", "type", "speaker", "content", "created_at"]
@@ -595,6 +608,7 @@ class CitizenAgent(Agent):
                 # 防止模拟器还没有到prepare阶段导致get_person出错
             self._has_bound_to_simulator = True
             self._agent_id = person_id
+            self.memory.set_agent_id(person_id)
     async def _bind_to_economy(self):
         if self._economy_client is None:

pycityagent/environment/sim/aoi_service.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from typing import Any, Awaitable, Coroutine, cast, Union, Dict
+from collections.abc import Awaitable, Coroutine
+from typing import Any, Dict, Union, cast
 import grpc
 from google.protobuf.json_format import ParseDict

pycityagent/environment/sim/clock_service.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from typing import Any, Awaitable, Coroutine, cast, Union, Dict
+from collections.abc import Awaitable, Coroutine
+from typing import Any, Dict, Union, cast
 import grpc
 from google.protobuf.json_format import ParseDict

pycityagent/environment/sim/economy_services.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from typing import Any, Awaitable, Coroutine, cast, Union, Dict
+from typing import Any, cast, Union
+from collections.abc import Awaitable, Coroutine
 import grpc
 from google.protobuf.json_format import ParseDict
@@ -25,7 +26,7 @@ class EconomyPersonService:
         self,
         req: Union[person_service.GetPersonRequest, dict],
         dict_return: bool = True,
-    ) -> Coroutine[Any, Any, Union[Dict[str, Any], person_service.GetPersonResponse]]:
+    ) -> Coroutine[Any, Any, Union[dict[str, Any], person_service.GetPersonResponse]]:
         """
         批量查询人的经济情况（资金、雇佣关系）
         Query person’s economic situation (funds, employment relationship) in batches
@@ -48,7 +49,7 @@ class EconomyPersonService:
         req: Union[person_service.UpdatePersonMoneyRequest, dict],
         dict_return: bool = True,
     ) -> Coroutine[
-        Any, Any, Union[Dict[str, Any], person_service.UpdatePersonMoneyResponse]
+        Any, Any, Union[dict[str, Any], person_service.UpdatePersonMoneyResponse]
     ]:
         """
         批量修改人的资金
@@ -80,7 +81,7 @@ class EconomyOrgService:
     def GetOrg(
         self, req: Union[org_service.GetOrgRequest, dict], dict_return: bool = True
-    ) -> Coroutine[Any, Any, Union[Dict[str, Any], org_service.GetOrgResponse]]:
+    ) -> Coroutine[Any, Any, Union[dict[str, Any], org_service.GetOrgResponse]]:
         """
         批量查询组织的经济情况（员工、岗位、资金、货物）
         Query the economic status of the organization (employees, positions, funds, goods) in batches
@@ -100,7 +101,7 @@ class EconomyOrgService:
         self,
         req: Union[org_service.UpdateOrgMoneyRequest, dict],
         dict_return: bool = True,
-    ) -> Coroutine[Any, Any, Union[Dict[str, Any], org_service.UpdateOrgMoneyResponse]]:
+    ) -> Coroutine[Any, Any, Union[dict[str, Any], org_service.UpdateOrgMoneyResponse]]:
         """
         批量修改组织的资金
         Modify organization’s money in batches
@@ -123,7 +124,7 @@ class EconomyOrgService:
         self,
         req: Union[org_service.UpdateOrgGoodsRequest, dict],
         dict_return: bool = True,
-    ) -> Coroutine[Any, Any, Union[Dict[str, Any], org_service.UpdateOrgGoodsResponse]]:
+    ) -> Coroutine[Any, Any, Union[dict[str, Any], org_service.UpdateOrgGoodsResponse]]:
         """
         批量修改组织的货物
         Modify organization’s goods in batches
@@ -147,7 +148,7 @@ class EconomyOrgService:
         req: Union[org_service.UpdateOrgEmployeeRequest, dict],
         dict_return: bool = True,
     ) -> Coroutine[
-        Any, Any, Union[Dict[str, Any], org_service.UpdateOrgEmployeeResponse]
+        Any, Any, Union[dict[str, Any], org_service.UpdateOrgEmployeeResponse]
     ]:
         """
         批量修改组织的员工
@@ -171,7 +172,7 @@ class EconomyOrgService:
         self,
         req: Union[org_service.UpdateOrgJobRequest, dict],
         dict_return: bool = True,
-    ) -> Coroutine[Any, Any, Union[Dict[str, Any], org_service.UpdateOrgJobResponse]]:
+    ) -> Coroutine[Any, Any, Union[dict[str, Any], org_service.UpdateOrgJobResponse]]:
         """
         批量修改组织的岗位
         Modify organization’s jobs in batches

pycityagent/environment/sim/lane_service.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from typing import Any, Awaitable, Coroutine, cast, Union, Dict
+from typing import Any,cast, Union
+from collections.abc import Awaitable, Coroutine
 import grpc
 from google.protobuf.json_format import ParseDict
@@ -21,7 +22,7 @@ class LaneService:
     def GetLane(
         self, req: Union[lane_service.GetLaneRequest, dict], dict_return: bool = True
-    ) -> Coroutine[Any, Any, Union[Dict[str, Any], lane_service.GetLaneResponse]]:
+    ) -> Coroutine[Any, Any, Union[dict[str, Any], lane_service.GetLaneResponse]]:
         """
         获取Lane的信息
         Get Lane's information
@@ -41,7 +42,7 @@ class LaneService:
         self,
         req: Union[lane_service.SetLaneMaxVRequest, dict],
         dict_return: bool = True,
-    ) -> Coroutine[Any, Any, Union[Dict[str, Any], lane_service.SetLaneMaxVResponse]]:
+    ) -> Coroutine[Any, Any, Union[dict[str, Any], lane_service.SetLaneMaxVResponse]]:
         """
         设置Lane的最大速度（限速）
         Set the maximum speed of Lane (speed limit)
@@ -64,7 +65,7 @@ class LaneService:
         req: Union[lane_service.SetLaneRestrictionRequest, dict],
         dict_return: bool = True,
     ) -> Coroutine[
-        Any, Any, Union[Dict[str, Any], lane_service.SetLaneRestrictionResponse]
+        Any, Any, Union[dict[str, Any], lane_service.SetLaneRestrictionResponse]
     ]:
         """
         设置Lane的限制
@@ -89,7 +90,7 @@ class LaneService:
         req: Union[lane_service.GetLaneByLongLatBBoxRequest, dict],
         dict_return: bool = True,
     ) -> Coroutine[
-        Any, Any, Union[Dict[str, Any], lane_service.GetLaneByLongLatBBoxResponse]
+        Any, Any, Union[dict[str, Any], lane_service.GetLaneByLongLatBBoxResponse]
     ]:
         """
         获取特定区域内的Lane的信息

pycityagent/environment/sim/light_service.py CHANGED Viewed

@@ -1,9 +1,11 @@
-from typing import Any, Awaitable, Coroutine, cast, Union, Dict
+from collections.abc import Awaitable, Coroutine
+from typing import Any, Union, cast
 import grpc
 from google.protobuf.json_format import ParseDict
 from pycityproto.city.map.v2 import traffic_light_service_pb2 as light_service
-from pycityproto.city.map.v2 import traffic_light_service_pb2_grpc as light_grpc
+from pycityproto.city.map.v2 import \
+    traffic_light_service_pb2_grpc as light_grpc
 from ..utils.protobuf import async_parse
@@ -21,10 +23,10 @@ class LightService:
     def GetTrafficLight(
         self,
-        req: Union[light_service.GetTrafficLightRequest, Dict[str, Any]],
+        req: Union[light_service.GetTrafficLightRequest, dict[str, Any]],
         dict_return: bool = True,
     ) -> Coroutine[
-        Any, Any, Union[Dict[str, Any], light_service.GetTrafficLightResponse]
+        Any, Any, Union[dict[str, Any], light_service.GetTrafficLightResponse]
     ]:
         """
         获取路口的红绿灯信息
@@ -46,10 +48,10 @@ class LightService:
     def SetTrafficLight(
         self,
-        req: Union[light_service.SetTrafficLightRequest, Dict[str, Any]],
+        req: Union[light_service.SetTrafficLightRequest, dict[str, Any]],
         dict_return: bool = True,
     ) -> Coroutine[
-        Any, Any, Union[Dict[str, Any], light_service.SetTrafficLightResponse]
+        Any, Any, Union[dict[str, Any], light_service.SetTrafficLightResponse]
     ]:
         """
         设置路口的红绿灯信息
@@ -74,7 +76,7 @@ class LightService:
         req: Union[light_service.SetTrafficLightPhaseRequest, dict],
         dict_return: bool = True,
     ) -> Coroutine[
-        Any, Any, Union[Dict[str, Any], light_service.SetTrafficLightPhaseResponse]
+        Any, Any, Union[dict[str, Any], light_service.SetTrafficLightPhaseResponse]
     ]:
         """
         设置路口的红绿灯相位
@@ -99,7 +101,7 @@ class LightService:
         req: Union[light_service.SetTrafficLightStatusRequest, dict],
         dict_return: bool = True,
     ) -> Coroutine[
-        Any, Any, Union[Dict[str, Any], light_service.SetTrafficLightStatusResponse]
+        Any, Any, Union[dict[str, Any], light_service.SetTrafficLightStatusResponse]
     ]:
         """
         设置路口的红绿灯状态

pycityagent/environment/sim/person_service.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import warnings
-from typing import Any, Awaitable, Coroutine, Dict, Union, cast
+from collections.abc import Awaitable, Coroutine
+from typing import Any, Union, cast
 import grpc
 from google.protobuf.json_format import ParseDict
@@ -51,7 +52,7 @@ class PersonService:
         self,
         req: Union[person_service.GetPersonRequest, dict],
         dict_return: bool = True,
-    ) -> Coroutine[Any, Any, Union[Dict[str, Any], person_service.GetPersonResponse]]:
+    ) -> Coroutine[Any, Any, Union[dict[str, Any], person_service.GetPersonResponse]]:
         """
         获取person信息
         Get person information
@@ -73,7 +74,7 @@ class PersonService:
         self,
         req: Union[person_service.AddPersonRequest, dict],
         dict_return: bool = True,
-    ) -> Coroutine[Any, Any, Union[Dict[str, Any], person_service.AddPersonResponse]]:
+    ) -> Coroutine[Any, Any, Union[dict[str, Any], person_service.AddPersonResponse]]:
         """
         新增person
         Add a new person
@@ -95,7 +96,7 @@ class PersonService:
         self,
         req: Union[person_service.SetScheduleRequest, dict],
         dict_return: bool = True,
-    ) -> Coroutine[Any, Any, Union[Dict[str, Any], person_service.SetScheduleResponse]]:
+    ) -> Coroutine[Any, Any, Union[dict[str, Any], person_service.SetScheduleResponse]]:
         """
         修改person的schedule
         set person's schedule
@@ -118,7 +119,7 @@ class PersonService:
         self,
         req: Union[person_service.GetPersonsRequest, dict],
         dict_return: bool = True,
-    ) -> Coroutine[Any, Any, Union[Dict[str, Any], person_service.GetPersonsResponse]]:
+    ) -> Coroutine[Any, Any, Union[dict[str, Any], person_service.GetPersonsResponse]]:
         """
         获取多个person信息
         Get information of multiple persons
@@ -142,7 +143,7 @@ class PersonService:
         req: Union[person_service.GetPersonByLongLatBBoxRequest, dict],
         dict_return: bool = True,
     ) -> Coroutine[
-        Any, Any, Union[Dict[str, Any], person_service.GetPersonByLongLatBBoxResponse]
+        Any, Any, Union[dict[str, Any], person_service.GetPersonByLongLatBBoxResponse]
     ]:
         """
         获取特定区域内的person
@@ -167,7 +168,7 @@ class PersonService:
         req: Union[person_service.GetAllVehiclesRequest, dict],
         dict_return: bool = True,
     ) -> Coroutine[
-        Any, Any, Union[Dict[str, Any], person_service.GetAllVehiclesResponse]
+        Any, Any, Union[dict[str, Any], person_service.GetAllVehiclesResponse]
     ]:
         """
         获取所有车辆
@@ -192,7 +193,7 @@ class PersonService:
         req: Union[person_service.ResetPersonPositionRequest, dict],
         dict_return: bool = True,
     ) -> Coroutine[
-        Any, Any, Union[Dict[str, Any], person_service.ResetPersonPositionResponse]
+        Any, Any, Union[dict[str, Any], person_service.ResetPersonPositionResponse]
     ]:
         """
         重置人的位置（将停止当前正在进行的出行，转为sleep状态）
@@ -219,7 +220,7 @@ class PersonService:
         req: Union[person_service.SetControlledVehicleIDsRequest, dict],
         dict_return: bool = True,
     ) -> Coroutine[
-        Any, Any, Union[Dict[str, Any], person_service.SetControlledVehicleIDsResponse]
+        Any, Any, Union[dict[str, Any], person_service.SetControlledVehicleIDsResponse]
     ]:
         """
         设置由外部控制行为的vehicle
@@ -246,7 +247,7 @@ class PersonService:
     ) -> Coroutine[
         Any,
         Any,
-        Union[Dict[str, Any], person_service.FetchControlledVehicleEnvsResponse],
+        Union[dict[str, Any], person_service.FetchControlledVehicleEnvsResponse],
     ]:
         """
         获取由外部控制行为的vehicle的环境信息
@@ -273,7 +274,7 @@ class PersonService:
     ) -> Coroutine[
         Any,
         Any,
-        Union[Dict[str, Any], person_service.SetControlledVehicleActionsResponse],
+        Union[dict[str, Any], person_service.SetControlledVehicleActionsResponse],
     ]:
         """
         设置由外部控制行为的vehicle的行为

pycityagent/environment/sim/road_service.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from typing import Any, Awaitable, Coroutine, cast, Union, Dict
+from collections.abc import Awaitable, Coroutine
+from typing import Any, Union, cast
 import grpc
 from google.protobuf.json_format import ParseDict
@@ -21,7 +22,7 @@ class RoadService:
     def GetRoad(
         self, req: Union[road_service.GetRoadRequest, dict], dict_return: bool = True
-    ) -> Coroutine[Any, Any, Union[Dict[str, Any], road_service.GetRoadResponse]]:
+    ) -> Coroutine[Any, Any, Union[dict[str, Any], road_service.GetRoadResponse]]:
         """
         查询道路信息
         Query road information

pycityagent/environment/sim/social_service.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from typing import Any, Awaitable, Coroutine, cast, Union, Dict
+from collections.abc import Awaitable, Coroutine
+from typing import Any, Union, cast
 import grpc
 from google.protobuf.json_format import ParseDict
@@ -21,7 +22,7 @@ class SocialService:
     def Send(
         self, req: Union[social_service.SendRequest, dict], dict_return: bool = True
-    ) -> Coroutine[Any, Any, Union[Dict[str, Any], social_service.SendResponse]]:
+    ) -> Coroutine[Any, Any, Union[dict[str, Any], social_service.SendResponse]]:
         """
         发送消息
         Send message
@@ -39,7 +40,7 @@ class SocialService:
     def Receive(
         self, req: Union[social_service.ReceiveRequest, dict], dict_return: bool = True
-    ) -> Coroutine[Any, Any, Union[Dict[str, Any], social_service.ReceiveResponse]]:
+    ) -> Coroutine[Any, Any, Union[dict[str, Any], social_service.ReceiveResponse]]:
         """
         接收消息
         Receive message

pycityagent/environment/utils/protobuf.py CHANGED Viewed

@@ -1,13 +1,15 @@
-from typing import Any, Awaitable, TypeVar, Union, Dict
-from google.protobuf.message import Message
+from collections.abc import Awaitable
+from typing import Any, TypeVar, Union
 from google.protobuf.json_format import MessageToDict
+from google.protobuf.message import Message
 __all__ = ["parse", "async_parse"]
 T = TypeVar("T", bound=Message)
-def parse(res: T, dict_return: bool) -> Union[Dict[str, Any], T]:
+def parse(res: T, dict_return: bool) -> Union[dict[str, Any], T]:
     """
     将Protobuf返回值转换为dict或者原始值
     Convert Protobuf return value to dict or original value
@@ -23,7 +25,7 @@ def parse(res: T, dict_return: bool) -> Union[Dict[str, Any], T]:
         return res
-async def async_parse(res: Awaitable[T], dict_return: bool) -> Union[Dict[str, Any], T]:
+async def async_parse(res: Awaitable[T], dict_return: bool) -> Union[dict[str, Any], T]:
     """
     将Protobuf await返回值转换为dict或者原始值
     Convert Protobuf await return value to dict or original value

pycityagent/llm/__init__.py CHANGED Viewed

@@ -1,6 +1,11 @@
 """LLM相关模块"""
+from .embeddings import SentenceEmbedding, SimpleEmbedding
 from .llm import LLM, LLMConfig
-from .embedding import SimpleEmbedding
-__all__ = ["LLM", "LLMConfig", "SimpleEmbedding"]
+__all__ = [
+    "LLM",
+    "LLMConfig",
+    "SentenceEmbedding",
+    "SimpleEmbedding",
+]

pycityagent/llm/embeddings.py ADDED Viewed

@@ -0,0 +1,231 @@
+import hashlib
+import json
+import os
+from typing import Optional, Union
+import numpy as np
+import torch
+from langchain_core.embeddings import Embeddings
+from transformers import AutoModel, AutoTokenizer
+__all__ = [
+    "SentenceEmbedding",
+    "SimpleEmbedding",
+]
+class SentenceEmbedding(Embeddings):
+    def __init__(
+        self,
+        pretrained_model_name_or_path: Union[str, os.PathLike] = "BAAI/bge-m3",
+        max_seq_len: int = 8192,
+        auto_cuda: bool = False,
+        local_files_only: bool = False,
+        cache_dir: str = "./cache",
+        proxies: Optional[dict] = None,
+    ):
+        os.makedirs(cache_dir, exist_ok=True)
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            pretrained_model_name_or_path,
+            proxies=proxies,
+            cache_dir=cache_dir,
+            local_files_only=local_files_only,
+        )
+        self.model = AutoModel.from_pretrained(
+            pretrained_model_name_or_path,
+            proxies=proxies,
+            cache_dir=cache_dir,
+            local_files_only=local_files_only,
+        )
+        self._cuda = auto_cuda and torch.cuda.is_available()
+        if self._cuda:
+            self.model = self.model.cuda()
+        self.model.eval()
+        self.max_seq_len = max_seq_len
+    def _embed(self, texts: list[str]) -> list[list[float]]:
+        # Tokenize sentences
+        encoded_input = self.tokenizer(
+            texts, padding=True, truncation=True, return_tensors="pt"
+        )
+        # for s2p(short query to long passage) retrieval task, add an instruction to query (not add instruction for passages)
+        # encoded_input = tokenizer([instruction + q for q in queries], padding=True, truncation=True, return_tensors='pt')
+        # check length of input
+        # assert seq_len <= 8192
+        assert encoded_input["input_ids"].shape[1] <= self.max_seq_len  # type: ignore
+        if self._cuda:
+            encoded_input = {k: v.cuda() for k, v in encoded_input.items()}
+        # Compute token embeddings
+        with torch.no_grad():
+            model_output = self.model(**encoded_input)
+            # Perform pooling. In this case, cls pooling.
+            sentence_embeddings = model_output[0][:, 0]
+        # normalize embeddings
+        sentence_embeddings = torch.nn.functional.normalize(
+            sentence_embeddings, p=2, dim=1
+        )
+        if self._cuda:
+            sentence_embeddings = sentence_embeddings.cpu()
+        return sentence_embeddings.tolist()
+    def embed_documents(self, texts: list[str]) -> list[list[float]]:
+        """Embed documents."""
+        return self._embed(texts)
+    def embed_query(self, text: str) -> list[float]:
+        """Embed query text."""
+        return self._embed([text])[0]
+class SimpleEmbedding(Embeddings):
+    """简单的基于内存的embedding实现
+    使用简单的词袋模型(Bag of Words)和TF-IDF来生成文本的向量表示。
+    所有向量都保存在内存中，适用于小规模应用。
+    """
+    def __init__(self, vector_dim: int = 128, cache_size: int = 1000):
+        """初始化
+        Args:
+            vector_dim: 向量维度
+            cache_size: 缓存大小，超过此大小将清除最早的缓存
+        """
+        self.vector_dim = vector_dim
+        self.cache_size = cache_size
+        self._cache: dict[str, list[float]] = {}
+        self._vocab: dict[str, int] = {}  # 词汇表
+        self._idf: dict[str, float] = {}  # 逆文档频率
+        self._doc_count = 0  # 文档总数
+    def _text_to_hash(self, text: str) -> str:
+        """将文本转换为hash值"""
+        return hashlib.md5(text.encode()).hexdigest()
+    def _tokenize(self, text: str) -> list[str]:
+        """简单的分词"""
+        # 这里使用简单的空格分词，实际应用中可以使用更复杂的分词方法
+        return text.lower().split()
+    def _update_vocab(self, tokens: list[str]):
+        """更新词汇表"""
+        for token in set(tokens):  # 使用set去重
+            if token not in self._vocab:
+                self._vocab[token] = len(self._vocab)
+    def _update_idf(self, tokens: list[str]):
+        """更新IDF值"""
+        self._doc_count += 1
+        unique_tokens = set(tokens)
+        for token in unique_tokens:
+            self._idf[token] = self._idf.get(token, 0) + 1
+    def _calculate_tf(self, tokens: list[str]) -> dict[str, float]:
+        """计算词频(TF)"""
+        tf = {}
+        total_tokens = len(tokens)
+        for token in tokens:
+            tf[token] = tf.get(token, 0) + 1
+        # 归一化
+        for token in tf:
+            tf[token] /= total_tokens
+        return tf
+    def _calculate_tfidf(self, tokens: list[str]) -> list[float]:
+        """计算TF-IDF向量"""
+        vector = np.zeros(self.vector_dim)
+        tf = self._calculate_tf(tokens)
+        for token, tf_value in tf.items():
+            if token in self._idf:
+                idf = np.log(self._doc_count / self._idf[token])
+                idx = self._vocab[token] % self.vector_dim  # 使用取模运算来控制向量维度
+                vector[idx] += tf_value * idf
+        # L2归一化
+        norm = np.linalg.norm(vector)
+        if norm > 0:
+            vector /= norm
+        return list(vector)
+    def _embed(self, text: str) -> list[float]:
+        """生成文本的向量表示
+        Args:
+            text: 输入文本
+        Returns:
+            np.ndarray: 文本的向量表示
+        """
+        # 检查缓存
+        text_hash = self._text_to_hash(text)
+        if text_hash in self._cache:
+            return self._cache[text_hash]
+        # 分词
+        tokens = self._tokenize(text)
+        if not tokens:
+            return list(np.zeros(self.vector_dim))
+        # 更新词汇表和IDF
+        self._update_vocab(tokens)
+        self._update_idf(tokens)
+        # 计算向量
+        vector = self._calculate_tfidf(tokens)
+        # 更新缓存
+        if len(self._cache) >= self.cache_size:
+            # 删除最早的缓存
+            oldest_key = next(iter(self._cache))
+            del self._cache[oldest_key]
+        self._cache[text_hash] = vector
+        return list(vector)
+    def embed_documents(self, texts: list[str]) -> list[list[float]]:
+        """Embed documents."""
+        return [self._embed(text) for text in texts]
+    def embed_query(self, text: str) -> list[float]:
+        """Embed query text."""
+        return self._embed(text)
+    # def save(self, file_path: str):
+    #     """保存模型"""
+    #     state = {
+    #         "vector_dim": self.vector_dim,
+    #         "cache_size": self.cache_size,
+    #         "vocab": self._vocab,
+    #         "idf": self._idf,
+    #         "doc_count": self._doc_count,
+    #     }
+    #     with open(file_path, "w") as f:
+    #         json.dump(state, f)
+    # def load(self, file_path: str):
+    #     """加载模型"""
+    #     with open(file_path, "r") as f:
+    #         state = json.load(f)
+    #     self.vector_dim = state["vector_dim"]
+    #     self.cache_size = state["cache_size"]
+    #     self._vocab = state["vocab"]
+    #     self._idf = state["idf"]
+    #     self._doc_count = state["doc_count"]
+    #     self._cache = {}  # 清空缓存
+if __name__ == "__main__":
+    # se = SentenceEmbedding(
+    #     pretrained_model_name_or_path="ignore/BAAI--bge-m3", cache_dir="ignore"
+    # )
+    se = SimpleEmbedding()
+    print(se.embed_query("hello world"))
+    print(se.embed_query("hello world"))
+    print(se.embed_query("hello world"))
+    print(se.embed_query("hello world"))

pycityagent 2.0.0a21__py3-none-any.whl → 2.0.0a24__py3-none-any.whl

pycityagent 2.0.0a21py3-none-any.whl → 2.0.0a24py3-none-any.whl