pycityagent 2.0.0a22__py3-none-any.whl → 2.0.0a25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pycityagent/__init__.py CHANGED
@@ -5,6 +5,7 @@ Pycityagent: 城市智能体构建框架
5
5
  from .agent import Agent, CitizenAgent, InstitutionAgent
6
6
  from .environment import Simulator
7
7
  import logging
8
+ from .llm import SentenceEmbedding
8
9
 
9
10
  # 创建一个 pycityagent 记录器
10
11
  logger = logging.getLogger("pycityagent")
@@ -19,4 +20,4 @@ if not logger.hasHandlers():
19
20
  handler.setFormatter(formatter)
20
21
  logger.addHandler(handler)
21
22
 
22
- __all__ = ["Agent", "Simulator", "CitizenAgent", "InstitutionAgent"]
23
+ __all__ = ["Agent", "Simulator", "CitizenAgent", "InstitutionAgent","SentenceEmbedding",]
pycityagent/agent.py CHANGED
@@ -236,7 +236,15 @@ class Agent(ABC):
236
236
 
237
237
  # 添加记忆上下文
238
238
  if self._memory:
239
- relevant_memories = await self._memory.search(survey_prompt)
239
+ relevant_memories = await self.memory.search(survey_prompt)
240
+
241
+ formatted_results = []
242
+ # for result in top_results:
243
+ # formatted_results.append(
244
+ # f"- [{result['type']}] {result['content']} "
245
+ # f"(相关度: {result['similarity']:.2f})"
246
+ # )
247
+
240
248
  if relevant_memories:
241
249
  dialog.append(
242
250
  {
@@ -458,7 +466,9 @@ class Agent(ABC):
458
466
  topic = f"exps/{self._exp_id}/agents/{to_agent_uuid}/{sub_topic}"
459
467
  await self._messager.send_message(topic, payload)
460
468
 
461
- async def send_message_to_agent(self, to_agent_uuid: str, content: str, type: str = "social"):
469
+ async def send_message_to_agent(
470
+ self, to_agent_uuid: str, content: str, type: str = "social"
471
+ ):
462
472
  """通过 Messager 发送消息"""
463
473
  if self._messager is None:
464
474
  raise RuntimeError("Messager is not set")
@@ -598,6 +608,7 @@ class CitizenAgent(Agent):
598
608
  # 防止模拟器还没有到prepare阶段导致get_person出错
599
609
  self._has_bound_to_simulator = True
600
610
  self._agent_id = person_id
611
+ self.memory.set_agent_id(person_id)
601
612
 
602
613
  async def _bind_to_economy(self):
603
614
  if self._economy_client is None:
@@ -3,7 +3,6 @@
3
3
  import asyncio
4
4
  import logging
5
5
  import os
6
- from collections.abc import Sequence
7
6
  from datetime import datetime, timedelta
8
7
  from typing import Any, Optional, Union, cast
9
8
 
@@ -22,13 +21,14 @@ from .utils.const import *
22
21
 
23
22
  logger = logging.getLogger("pycityagent")
24
23
 
24
+
25
25
  class Simulator:
26
26
  """
27
27
  - 模拟器主类
28
28
  - Simulator Class
29
29
  """
30
30
 
31
- def __init__(self, config:dict, secure: bool = False) -> None:
31
+ def __init__(self, config: dict, secure: bool = False) -> None:
32
32
  self.config = config
33
33
  """
34
34
  - 模拟器配置
@@ -193,7 +193,7 @@ class Simulator:
193
193
  else:
194
194
  # BUG: 返回的time是float类型
195
195
  return t_sec["t"]
196
-
196
+
197
197
  async def get_simulator_day(self) -> int:
198
198
  """
199
199
  获取模拟器到第几日
@@ -202,7 +202,7 @@ class Simulator:
202
202
  t_sec = cast(dict[str, int], t_sec)
203
203
  day = t_sec["t"] // 86400
204
204
  return day
205
-
205
+
206
206
  async def get_simulator_second_from_start_of_day(self) -> int:
207
207
  """
208
208
  获取模拟器从00:00:00到当前的秒数
@@ -316,7 +316,7 @@ class Simulator:
316
316
  radius: float,
317
317
  poi_type: Union[str, list[str]],
318
318
  ):
319
- if not isinstance(poi_type, Sequence):
319
+ if isinstance(poi_type, str):
320
320
  poi_type = [poi_type]
321
321
  transformed_poi_type = []
322
322
  for t in poi_type:
@@ -1,6 +1,11 @@
1
1
  """LLM相关模块"""
2
2
 
3
+ from .embeddings import SentenceEmbedding, SimpleEmbedding
3
4
  from .llm import LLM, LLMConfig
4
- from .embedding import SimpleEmbedding
5
5
 
6
- __all__ = ["LLM", "LLMConfig", "SimpleEmbedding"]
6
+ __all__ = [
7
+ "LLM",
8
+ "LLMConfig",
9
+ "SentenceEmbedding",
10
+ "SimpleEmbedding",
11
+ ]
@@ -0,0 +1,231 @@
1
+ import hashlib
2
+ import json
3
+ import os
4
+ from typing import Optional, Union
5
+
6
+ import numpy as np
7
+ import torch
8
+ from langchain_core.embeddings import Embeddings
9
+ from transformers import AutoModel, AutoTokenizer
10
+
11
+ __all__ = [
12
+ "SentenceEmbedding",
13
+ "SimpleEmbedding",
14
+ ]
15
+
16
+
17
+ class SentenceEmbedding(Embeddings):
18
+ def __init__(
19
+ self,
20
+ pretrained_model_name_or_path: Union[str, os.PathLike] = "BAAI/bge-m3",
21
+ max_seq_len: int = 8192,
22
+ auto_cuda: bool = False,
23
+ local_files_only: bool = False,
24
+ cache_dir: str = "./cache",
25
+ proxies: Optional[dict] = None,
26
+ ):
27
+ os.makedirs(cache_dir, exist_ok=True)
28
+ self.tokenizer = AutoTokenizer.from_pretrained(
29
+ pretrained_model_name_or_path,
30
+ proxies=proxies,
31
+ cache_dir=cache_dir,
32
+ local_files_only=local_files_only,
33
+ )
34
+ self.model = AutoModel.from_pretrained(
35
+ pretrained_model_name_or_path,
36
+ proxies=proxies,
37
+ cache_dir=cache_dir,
38
+ local_files_only=local_files_only,
39
+ )
40
+ self._cuda = auto_cuda and torch.cuda.is_available()
41
+
42
+ if self._cuda:
43
+ self.model = self.model.cuda()
44
+
45
+ self.model.eval()
46
+ self.max_seq_len = max_seq_len
47
+
48
+ def _embed(self, texts: list[str]) -> list[list[float]]:
49
+ # Tokenize sentences
50
+ encoded_input = self.tokenizer(
51
+ texts, padding=True, truncation=True, return_tensors="pt"
52
+ )
53
+ # for s2p(short query to long passage) retrieval task, add an instruction to query (not add instruction for passages)
54
+ # encoded_input = tokenizer([instruction + q for q in queries], padding=True, truncation=True, return_tensors='pt')
55
+
56
+ # check length of input
57
+ # assert seq_len <= 8192
58
+ assert encoded_input["input_ids"].shape[1] <= self.max_seq_len # type: ignore
59
+
60
+ if self._cuda:
61
+ encoded_input = {k: v.cuda() for k, v in encoded_input.items()}
62
+ # Compute token embeddings
63
+ with torch.no_grad():
64
+ model_output = self.model(**encoded_input)
65
+ # Perform pooling. In this case, cls pooling.
66
+ sentence_embeddings = model_output[0][:, 0]
67
+ # normalize embeddings
68
+ sentence_embeddings = torch.nn.functional.normalize(
69
+ sentence_embeddings, p=2, dim=1
70
+ )
71
+ if self._cuda:
72
+ sentence_embeddings = sentence_embeddings.cpu()
73
+ return sentence_embeddings.tolist()
74
+
75
+ def embed_documents(self, texts: list[str]) -> list[list[float]]:
76
+ """Embed documents."""
77
+ return self._embed(texts)
78
+
79
+ def embed_query(self, text: str) -> list[float]:
80
+ """Embed query text."""
81
+ return self._embed([text])[0]
82
+
83
+
84
+ class SimpleEmbedding(Embeddings):
85
+ """简单的基于内存的embedding实现
86
+
87
+ 使用简单的词袋模型(Bag of Words)和TF-IDF来生成文本的向量表示。
88
+ 所有向量都保存在内存中,适用于小规模应用。
89
+ """
90
+
91
+ def __init__(self, vector_dim: int = 128, cache_size: int = 1000):
92
+ """初始化
93
+
94
+ Args:
95
+ vector_dim: 向量维度
96
+ cache_size: 缓存大小,超过此大小将清除最早的缓存
97
+ """
98
+ self.vector_dim = vector_dim
99
+ self.cache_size = cache_size
100
+ self._cache: dict[str, list[float]] = {}
101
+ self._vocab: dict[str, int] = {} # 词汇表
102
+ self._idf: dict[str, float] = {} # 逆文档频率
103
+ self._doc_count = 0 # 文档总数
104
+
105
+ def _text_to_hash(self, text: str) -> str:
106
+ """将文本转换为hash值"""
107
+ return hashlib.md5(text.encode()).hexdigest()
108
+
109
+ def _tokenize(self, text: str) -> list[str]:
110
+ """简单的分词"""
111
+ # 这里使用简单的空格分词,实际应用中可以使用更复杂的分词方法
112
+ return text.lower().split()
113
+
114
+ def _update_vocab(self, tokens: list[str]):
115
+ """更新词汇表"""
116
+ for token in set(tokens): # 使用set去重
117
+ if token not in self._vocab:
118
+ self._vocab[token] = len(self._vocab)
119
+
120
+ def _update_idf(self, tokens: list[str]):
121
+ """更新IDF值"""
122
+ self._doc_count += 1
123
+ unique_tokens = set(tokens)
124
+ for token in unique_tokens:
125
+ self._idf[token] = self._idf.get(token, 0) + 1
126
+
127
+ def _calculate_tf(self, tokens: list[str]) -> dict[str, float]:
128
+ """计算词频(TF)"""
129
+ tf = {}
130
+ total_tokens = len(tokens)
131
+ for token in tokens:
132
+ tf[token] = tf.get(token, 0) + 1
133
+ # 归一化
134
+ for token in tf:
135
+ tf[token] /= total_tokens
136
+ return tf
137
+
138
+ def _calculate_tfidf(self, tokens: list[str]) -> list[float]:
139
+ """计算TF-IDF向量"""
140
+ vector = np.zeros(self.vector_dim)
141
+ tf = self._calculate_tf(tokens)
142
+
143
+ for token, tf_value in tf.items():
144
+ if token in self._idf:
145
+ idf = np.log(self._doc_count / self._idf[token])
146
+ idx = self._vocab[token] % self.vector_dim # 使用取模运算来控制向量维度
147
+ vector[idx] += tf_value * idf
148
+
149
+ # L2归一化
150
+ norm = np.linalg.norm(vector)
151
+ if norm > 0:
152
+ vector /= norm
153
+
154
+ return list(vector)
155
+
156
+ def _embed(self, text: str) -> list[float]:
157
+ """生成文本的向量表示
158
+
159
+ Args:
160
+ text: 输入文本
161
+
162
+ Returns:
163
+ np.ndarray: 文本的向量表示
164
+ """
165
+ # 检查缓存
166
+ text_hash = self._text_to_hash(text)
167
+ if text_hash in self._cache:
168
+ return self._cache[text_hash]
169
+
170
+ # 分词
171
+ tokens = self._tokenize(text)
172
+ if not tokens:
173
+ return list(np.zeros(self.vector_dim))
174
+
175
+ # 更新词汇表和IDF
176
+ self._update_vocab(tokens)
177
+ self._update_idf(tokens)
178
+
179
+ # 计算向量
180
+ vector = self._calculate_tfidf(tokens)
181
+
182
+ # 更新缓存
183
+ if len(self._cache) >= self.cache_size:
184
+ # 删除最早的缓存
185
+ oldest_key = next(iter(self._cache))
186
+ del self._cache[oldest_key]
187
+ self._cache[text_hash] = vector
188
+
189
+ return list(vector)
190
+
191
+ def embed_documents(self, texts: list[str]) -> list[list[float]]:
192
+ """Embed documents."""
193
+ return [self._embed(text) for text in texts]
194
+
195
+ def embed_query(self, text: str) -> list[float]:
196
+ """Embed query text."""
197
+ return self._embed(text)
198
+
199
+ # def save(self, file_path: str):
200
+ # """保存模型"""
201
+ # state = {
202
+ # "vector_dim": self.vector_dim,
203
+ # "cache_size": self.cache_size,
204
+ # "vocab": self._vocab,
205
+ # "idf": self._idf,
206
+ # "doc_count": self._doc_count,
207
+ # }
208
+ # with open(file_path, "w") as f:
209
+ # json.dump(state, f)
210
+
211
+ # def load(self, file_path: str):
212
+ # """加载模型"""
213
+ # with open(file_path, "r") as f:
214
+ # state = json.load(f)
215
+ # self.vector_dim = state["vector_dim"]
216
+ # self.cache_size = state["cache_size"]
217
+ # self._vocab = state["vocab"]
218
+ # self._idf = state["idf"]
219
+ # self._doc_count = state["doc_count"]
220
+ # self._cache = {} # 清空缓存
221
+
222
+
223
+ if __name__ == "__main__":
224
+ # se = SentenceEmbedding(
225
+ # pretrained_model_name_or_path="ignore/BAAI--bge-m3", cache_dir="ignore"
226
+ # )
227
+ se = SimpleEmbedding()
228
+ print(se.embed_query("hello world"))
229
+ print(se.embed_query("hello world"))
230
+ print(se.embed_query("hello world"))
231
+ print(se.embed_query("hello world"))
@@ -1,5 +1,6 @@
1
1
  """Memory."""
2
2
 
3
+ from .faiss_query import FaissQuery
3
4
  from .memory import Memory
4
5
  from .memory_base import MemoryBase, MemoryUnit
5
6
  from .profile import ProfileMemory, ProfileMemoryUnit
@@ -8,4 +9,5 @@ from .state import StateMemory
8
9
 
9
10
  __all__ = [
10
11
  "Memory",
12
+ "FaissQuery",
11
13
  ]
@@ -0,0 +1,302 @@
1
+ import asyncio
2
+ from collections.abc import Sequence
3
+ from typing import Any, Literal, Optional, Union
4
+
5
+ import faiss
6
+ import numpy as np
7
+ from langchain_community.docstore.in_memory import InMemoryDocstore
8
+ from langchain_community.vectorstores import FAISS
9
+ from langchain_core.documents import Document
10
+ from langchain_core.embeddings import Embeddings
11
+
12
+ from ..utils.decorators import lock_decorator
13
+
14
+
15
+ class FaissQuery:
16
+ def __init__(
17
+ self,
18
+ embeddings: Optional[Embeddings] = None,
19
+ index_type: Any = faiss.IndexFlatL2,
20
+ dimension: Optional[int] = None,
21
+ ) -> None:
22
+ self._embeddings = embeddings
23
+ self._lock = asyncio.Lock()
24
+ if embeddings is None:
25
+ self._index = None
26
+ self._vectors_store = None
27
+ else:
28
+ if dimension is None:
29
+ dimension = len(embeddings.embed_query("hello world"))
30
+ self._index = index_type(dimension)
31
+ self._vectors_store = FAISS(
32
+ embedding_function=embeddings,
33
+ index=self._index,
34
+ docstore=InMemoryDocstore(),
35
+ index_to_docstore_id={},
36
+ )
37
+
38
+ @property
39
+ def embeddings(
40
+ self,
41
+ ) -> Embeddings:
42
+ if self._embeddings is None:
43
+ raise RuntimeError(f"No embedding set, please `set_embeddings` first!")
44
+ return self._embeddings
45
+
46
+ @property
47
+ def vectors_store(
48
+ self,
49
+ ) -> FAISS:
50
+ if self._vectors_store is None:
51
+ raise RuntimeError(f"No embedding set, thus no vector stores initialized!")
52
+ return self._vectors_store
53
+
54
+ @lock_decorator
55
+ async def add_documents(
56
+ self,
57
+ agent_id: int,
58
+ documents: Union[str, Sequence[str]],
59
+ extra_tags: Optional[dict] = None,
60
+ ) -> list[str]:
61
+ if isinstance(documents, str):
62
+ documents = [documents]
63
+ _metadata = {"_id": agent_id}
64
+ if extra_tags is not None:
65
+ _metadata.update(extra_tags)
66
+ to_add_documents = [
67
+ Document(page_content=doc, metadata=_metadata) for doc in documents
68
+ ]
69
+ return await self.vectors_store.aadd_documents(
70
+ documents=to_add_documents,
71
+ )
72
+
73
+ @lock_decorator
74
+ async def delete_documents(
75
+ self,
76
+ to_delete_ids: list[str],
77
+ ):
78
+ await self.vectors_store.adelete(
79
+ ids=to_delete_ids,
80
+ )
81
+
82
+ @lock_decorator
83
+ async def similarity_search(
84
+ self,
85
+ query: str,
86
+ agent_id: int,
87
+ k: int = 4,
88
+ fetch_k: int = 20,
89
+ return_score_type: Union[
90
+ Literal["none"], Literal["similarity_score"], Literal["L2-distance"]
91
+ ] = "none",
92
+ filter: Optional[dict] = None,
93
+ ) -> Union[list[tuple[str, dict]], list[tuple[str, float, dict]]]:
94
+ """
95
+ Return content most similar to the given query.
96
+
97
+ Args:
98
+ query (str): The text to look up documents similar to.
99
+ agent_id (int): The identifier of the agent to filter specific documents. Only documents associated with this agent will be considered.
100
+ k (int, optional): The number of top similar contents to return. Defaults to 4.
101
+ fetch_k (int, optional): The number of documents to fetch before applying any filters. Defaults to 20.
102
+ return_score_type (Union[Literal["none"], Literal["similarity_score"], Literal["L2-distance"]], optional):
103
+ Specifies whether and how to return similarity scores with the results:
104
+ - "none": Do not return scores; only return the contents (default).
105
+ - "similarity_score": Return a tuple of content and its similarity score.
106
+ - "L2-distance": Return a tuple of content and its L2 distance from the query.
107
+ filter (dict, optional): The filter dict for metadata.
108
+
109
+ Returns:
110
+ Union[list[tuple[str,dict]], list[tuple[str, float,dict]]]:
111
+ Depending on the `return_score_type` parameter, returns either a list of strings representing the top-k similar contents,
112
+ or a list of tuples where each tuple contains a string and a floating-point score.
113
+ """
114
+ _filter = {
115
+ "_id": agent_id,
116
+ }
117
+ if filter is not None:
118
+ _filter.update(filter)
119
+ if return_score_type == "L2-distance":
120
+ _result = await self.vectors_store.asimilarity_search_with_score(
121
+ query=query,
122
+ k=k,
123
+ filter=_filter,
124
+ fetch_k=fetch_k,
125
+ )
126
+ return [(r.page_content, s, r.metadata) for r, s in _result]
127
+ elif return_score_type == "none":
128
+ _result = await self.vectors_store.asimilarity_search(
129
+ query=query,
130
+ k=k,
131
+ filter=_filter,
132
+ fetch_k=fetch_k,
133
+ )
134
+ return [(r.page_content, r.metadata) for r in _result]
135
+ elif return_score_type == "similarity_score":
136
+ _result = await self.vectors_store.asimilarity_search_with_relevance_scores(
137
+ query=query,
138
+ k=k,
139
+ filter=_filter,
140
+ fetch_k=fetch_k,
141
+ )
142
+ return [(r.page_content, s, r.metadata) for r, s in _result]
143
+ else:
144
+ raise ValueError(f"Invalid `return_score_type` {return_score_type}!")
145
+
146
+ @lock_decorator
147
+ async def similarity_search_by_embedding(
148
+ self,
149
+ embedding: list[float],
150
+ agent_id: int,
151
+ k: int = 4,
152
+ fetch_k: int = 20,
153
+ return_score_type: Union[Literal["none"], Literal["L2-distance"]] = "none",
154
+ filter: Optional[dict] = None,
155
+ ) -> Union[list[tuple[str, dict]], list[tuple[str, float, dict]]]:
156
+ """
157
+ Return content most similar to the given query.
158
+
159
+ Args:
160
+ embedding (list[float]): The vector to look up documents similar to.
161
+ agent_id (int): The identifier of the agent to filter specific documents. Only documents associated with this agent will be considered.
162
+ k (int, optional): The number of top similar contents to return. Defaults to 4.
163
+ fetch_k (int, optional): The number of documents to fetch before applying any filters. Defaults to 20.
164
+ return_score_type (Union[Literal["none"], Literal["similarity_score"], Literal["L2-distance"]], optional):
165
+ Specifies whether and how to return similarity scores with the results:
166
+ - "none": Do not return scores; only return the contents (default).
167
+ - "L2-distance": Return a tuple of content and its L2 distance from the query.
168
+ filter (dict, optional): The filter dict for metadata.
169
+
170
+ Returns:
171
+ Union[list[tuple[str,dict]], list[tuple[str, float,dict]]]:
172
+ Depending on the `return_score_type` parameter, returns either a list of strings representing the top-k similar contents,
173
+ or a list of tuples where each tuple contains a string and a floating-point score.
174
+ """
175
+ _filter = {
176
+ "_id": agent_id,
177
+ }
178
+ if filter is not None:
179
+ _filter.update(filter)
180
+ if return_score_type == "L2-distance":
181
+ _result = await self.vectors_store.asimilarity_search_with_score_by_vector(
182
+ embedding=embedding,
183
+ k=k,
184
+ filter=_filter,
185
+ fetch_k=fetch_k,
186
+ )
187
+ return [(r.page_content, s, r.metadata) for r, s in _result]
188
+ elif return_score_type == "none":
189
+ _result = await self.vectors_store.asimilarity_search_by_vector(
190
+ embedding=embedding,
191
+ k=k,
192
+ filter=_filter,
193
+ fetch_k=fetch_k,
194
+ )
195
+ return [(r.page_content, r.metadata) for r in _result]
196
+ else:
197
+ raise ValueError(f"Invalid `return_score_type` {return_score_type}!")
198
+
199
+ @lock_decorator
200
+ async def marginal_relevance_search(
201
+ self,
202
+ query: str,
203
+ agent_id: int,
204
+ k: int = 4,
205
+ fetch_k: int = 20,
206
+ lambda_mult: float = 0.5,
207
+ return_score_type: Literal["none"] = "none",
208
+ filter: Optional[dict] = None,
209
+ ) -> list[tuple[str, dict]]:
210
+ """
211
+ Return contents selected using the maximal marginal relevance asynchronously.
212
+
213
+ Args:
214
+ query (str): The text to look up documents similar to.
215
+ agent_id (int): The identifier of the agent to filter specific documents. Only documents associated with this agent will be considered.
216
+ k (int, optional): The number of top similar contents to return. Defaults to 4.
217
+ fetch_k (int, optional): The number of documents to fetch before applying any filters. Defaults to 20.
218
+ lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
219
+ return_score_type (Literal["none"].,optional):
220
+ Specifies whether and how to return similarity scores with the results:
221
+ - "none": Do not return scores; only return the contents (default).
222
+ filter (dict, optional): The filter dict for metadata.
223
+
224
+ Returns:
225
+ list[tuple[str,dict]]: the result contents.
226
+ """
227
+ _filter = {
228
+ "_id": agent_id,
229
+ }
230
+ if filter is not None:
231
+ _filter.update(filter)
232
+
233
+ if return_score_type == "none":
234
+ _result = await self.vectors_store.amax_marginal_relevance_search(
235
+ query=query,
236
+ k=k,
237
+ filter=_filter,
238
+ fetch_k=fetch_k,
239
+ lambda_mult=lambda_mult,
240
+ )
241
+ return [(r.page_content, r.metadata) for r in _result]
242
+ else:
243
+ raise ValueError(f"Invalid `return_score_type` {return_score_type}!")
244
+
245
+ @lock_decorator
246
+ async def marginal_relevance_search_by_embedding(
247
+ self,
248
+ embedding: list[float],
249
+ agent_id: int,
250
+ k: int = 4,
251
+ fetch_k: int = 20,
252
+ lambda_mult: float = 0.5,
253
+ return_score_type: Union[Literal["none"], Literal["similarity_score"]] = "none",
254
+ filter: Optional[dict] = None,
255
+ ) -> Union[list[tuple[str, dict]], list[tuple[str, float, dict]]]:
256
+ """
257
+ Return contents selected using the maximal marginal relevance asynchronously.
258
+
259
+ Args:
260
+ embedding (list[float]): The vector to look up documents similar to.
261
+ agent_id (int): The identifier of the agent to filter specific documents. Only documents associated with this agent will be considered.
262
+ k (int, optional): The number of top similar contents to return. Defaults to 4.
263
+ fetch_k (int, optional): The number of documents to fetch before applying any filters. Defaults to 20.
264
+ lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
265
+ return_score_type (Union[Literal["none"], Literal["similarity_score"]], optional):
266
+ Specifies whether and how to return similarity scores with the results:
267
+ - "none": Do not return scores; only return the contents (default).
268
+ - "similarity_score": Return a tuple of content and its similarity score.
269
+ filter (dict, optional): The filter dict for metadata.
270
+
271
+ Returns:
272
+ Union[list[tuple[str,dict]], list[tuple[str, float,dict]]]:
273
+ Depending on the `return_score_type` parameter, returns either a list of strings representing the top-k similar contents,
274
+ or a list of tuples where each tuple contains a string and a floating-point score.
275
+ """
276
+
277
+ _filter = {
278
+ "_id": agent_id,
279
+ }
280
+ if filter is not None:
281
+ _filter.update(filter)
282
+ if return_score_type == "none":
283
+ _result = await self.vectors_store.amax_marginal_relevance_search_by_vector(
284
+ embedding=embedding,
285
+ k=k,
286
+ filter=_filter,
287
+ fetch_k=fetch_k,
288
+ lambda_mult=lambda_mult,
289
+ )
290
+ return [(r.page_content, r.metadata) for r in _result]
291
+ elif return_score_type == "similarity_score":
292
+ _result = await self.vectors_store.amax_marginal_relevance_search_with_score_by_vector(
293
+ embedding=embedding,
294
+ k=k,
295
+ filter=_filter,
296
+ fetch_k=fetch_k,
297
+ lambda_mult=lambda_mult,
298
+ )
299
+ return [(r.page_content, s, r.metadata) for r, s in _result]
300
+
301
+ else:
302
+ raise ValueError(f"Invalid `return_score_type` {return_score_type}!")