bisheng-langchain 0.2.3__py3-none-any.whl → 0.2.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -82,7 +82,7 @@ def _parse_ai_message(message: BaseMessage) -> Union[AgentAction, AgentFinish]:
82
82
  if not isinstance(message, AIMessage):
83
83
  raise TypeError(f'Expected an AI message got {type(message)}')
84
84
 
85
- function_call = message.additional_kwargs.get('function_call', {})
85
+ function_call = message.additional_kwargs.get('tool_calls', {})
86
86
 
87
87
  if function_call:
88
88
  function_name = function_call['name']
@@ -1,4 +1,4 @@
1
- from .host_llm import CustomLLMChat, HostBaichuanChat, HostChatGLM, HostLlama2Chat, HostQwenChat, HostYuanChat, HostYiChat
1
+ from .host_llm import CustomLLMChat, HostBaichuanChat, HostChatGLM, HostLlama2Chat, HostQwenChat, HostQwen1_5Chat, HostYuanChat, HostYiChat
2
2
  from .minimax import ChatMinimaxAI
3
3
  from .proxy_llm import ProxyChatLLM
4
4
  from .qwen import ChatQWen
@@ -10,5 +10,5 @@ from .sensetime import SenseChat
10
10
  __all__ = [
11
11
  'ProxyChatLLM', 'ChatMinimaxAI', 'ChatWenxin', 'ChatZhipuAI', 'ChatXunfeiAI', 'HostChatGLM',
12
12
  'HostBaichuanChat', 'HostLlama2Chat', 'HostQwenChat', 'CustomLLMChat', 'ChatQWen', 'SenseChat',
13
- 'HostYuanChat', 'HostYiChat'
13
+ 'HostYuanChat', 'HostYiChat', 'HostQwen1_5Chat'
14
14
  ]
@@ -341,7 +341,7 @@ class BaseHostChatLLM(BaseChatModel):
341
341
  function_call['arguments'] += _function_call['arguments']
342
342
  message = _convert_dict_to_message({
343
343
  'content': inner_completion,
344
- 'role': role,
344
+ 'role': role if role is not None else 'assistant',
345
345
  'function_call': function_call,
346
346
  })
347
347
  return ChatResult(generations=[ChatGeneration(message=message)])
@@ -508,6 +508,19 @@ class HostQwenChat(BaseHostChatLLM):
508
508
  return 'qwen_chat'
509
509
 
510
510
 
511
+ class HostQwen1_5Chat(BaseHostChatLLM):
512
+ # Qwen-7B-Chat
513
+ model_name: str = Field('Qwen1.5-14B-Chat', alias='model')
514
+
515
+ temperature: float = 0
516
+ top_p: float = 1
517
+ max_tokens: int = 4096
518
+
519
+ @property
520
+ def _llm_type(self) -> str:
521
+ """Return type of chat model."""
522
+ return 'qwen1.5_chat'
523
+
511
524
  class HostLlama2Chat(BaseHostChatLLM):
512
525
  # Llama-2-7b-chat-hf, Llama-2-13b-chat-hf, Llama-2-70b-chat-hf
513
526
  model_name: str = Field('Llama-2-7b-chat-hf', alias='model')
@@ -1,5 +1,4 @@
1
- # from typing import Union
2
-
1
+ from typing import Union, List
3
2
  from pydantic import BaseModel
4
3
 
5
4
 
@@ -16,14 +15,14 @@ class Function(BaseModel):
16
15
 
17
16
  class ChatInput(BaseModel):
18
17
  model: str
19
- messages: list[Message] = []
18
+ messages: List[Message] = []
20
19
  top_p: float = None
21
20
  temperature: float = None
22
21
  n: int = 1
23
22
  stream: bool = False
24
23
  stop: str = None
25
24
  max_tokens: int = 256
26
- functions: list[Function] = []
25
+ functions: List[Function] = []
27
26
  function_call: str = None
28
27
 
29
28
 
@@ -46,7 +45,7 @@ class ChatOutput(BaseModel):
46
45
  object: str = None
47
46
  model: str = None
48
47
  created: int = None
49
- choices: list[Choice] = []
48
+ choices: List[Choice] = []
50
49
  usage: Usage = None
51
50
 
52
51
 
@@ -105,7 +105,7 @@ class ChatQWen(BaseChatModel):
105
105
  .. code-block:: python
106
106
 
107
107
  from bisheng_langchain.chat_models import ChatQWen
108
- chat_miniamaxai = ChatQWen(model_name="qwen-turbo")
108
+ chat_qwen = ChatQWen(model_name="qwen-turbo")
109
109
  """
110
110
 
111
111
  client: Optional[Any] #: :meta private:
@@ -192,7 +192,11 @@ class ChatQWen(BaseChatModel):
192
192
  return self.client.post(url=url, json=inp).json()
193
193
 
194
194
  rsp_dict = _completion_with_retry(**kwargs)
195
- if 'output' not in rsp_dict:
195
+ if 'code' in rsp_dict and rsp_dict['code'] == 'DataInspectionFailed':
196
+ output_res = {'choices': [{'finish_reason': 'stop', 'message': {'role': 'assistant', 'content': rsp_dict['message']}}]}
197
+ usage_res = {'total_tokens': 2, 'output_tokens': 1, 'input_tokens': 1}
198
+ return output_res, usage_res
199
+ elif 'output' not in rsp_dict:
196
200
  logger.error(f'proxy_llm_error resp={rsp_dict}')
197
201
  message = rsp_dict['message']
198
202
  raise Exception(message)
@@ -226,8 +226,15 @@ class ChatZhipuAI(BaseChatModel):
226
226
  ) -> ChatResult:
227
227
  message_dicts, params = self._create_message_dicts(messages, stop)
228
228
  params = {**params, **kwargs}
229
-
230
229
  response = self.completion_with_retry(messages=message_dicts, **params)
230
+ if response['code'] == 1301:
231
+ response = {'code': 200,
232
+ 'msg': '操作成功',
233
+ 'data': {'request_id': '0', 'task_id': '0', 'task_status': 'SUCCESS',
234
+ 'choices': [{'role': 'assistant', 'content': '""'}],
235
+ 'usage': {'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0}},
236
+ 'success': True}
237
+
231
238
  return self._create_chat_result(response)
232
239
 
233
240
  async def _agenerate(
@@ -1,8 +1,11 @@
1
1
  from .host_embedding import (BGEZhEmbedding, CustomHostEmbedding, GTEEmbedding, HostEmbeddings,
2
- ME5Embedding)
2
+ ME5Embedding, JINAEmbedding)
3
3
  from .wenxin import WenxinEmbeddings
4
+ from .huggingfacemultilingual import HuggingFaceMultilingualEmbeddings
5
+ from .huggingfacegte import HuggingFaceGteEmbeddings
4
6
 
5
7
  __all__ = [
6
8
  'WenxinEmbeddings', 'ME5Embedding', 'BGEZhEmbedding', 'GTEEmbedding',
7
- 'HostEmbeddings', 'CustomHostEmbedding'
9
+ 'HostEmbeddings', 'CustomHostEmbedding', 'JINAEmbedding',
10
+ 'HuggingFaceMultilingualEmbeddings', 'HuggingFaceGteEmbeddings'
8
11
  ]
@@ -158,6 +158,10 @@ class GTEEmbedding(HostEmbeddings):
158
158
  embedding_ctx_length: int = 512
159
159
 
160
160
 
161
+ class JINAEmbedding(HostEmbeddings):
162
+ model: str = 'jina'
163
+ embedding_ctx_length: int = 512
164
+
161
165
  class CustomHostEmbedding(HostEmbeddings):
162
166
  model: str = Field('custom-embedding', alias='model')
163
167
  embedding_ctx_length: int = 512
@@ -0,0 +1,95 @@
1
+ from typing import Any, Dict, List, Optional
2
+
3
+ import requests
4
+ from langchain_core.embeddings import Embeddings
5
+ from langchain_core.pydantic_v1 import BaseModel, Extra, Field
6
+ DEFAULT_Multilingual_MODEL = "thenlper/gte-large-zh"
7
+
8
+
9
+ class HuggingFaceGteEmbeddings(BaseModel, Embeddings):
10
+ """HuggingFace BGE sentence_transformers embedding models.
11
+
12
+ To use, you should have the ``sentence_transformers`` python package installed.
13
+
14
+ Example:
15
+ .. code-block:: python
16
+
17
+ from langchain_community.embeddings import HuggingFaceBgeEmbeddings
18
+
19
+ model_name = "BAAI/bge-large-en"
20
+ model_kwargs = {'device': 'cpu'}
21
+ encode_kwargs = {'normalize_embeddings': True}
22
+ hf = HuggingFaceBgeEmbeddings(
23
+
24
+ model_name=model_name,
25
+ model_kwargs=model_kwargs,
26
+ encode_kwargs=encode_kwargs
27
+ )
28
+ """
29
+
30
+ client: Any #: :meta private:
31
+ model_name: str = DEFAULT_Multilingual_MODEL
32
+ """Model name to use."""
33
+ cache_folder: Optional[str] = None
34
+ """Path to store models.
35
+ Can be also set by SENTENCE_TRANSFORMERS_HOME environment variable."""
36
+ model_kwargs: Dict[str, Any] = Field(default_factory=dict)
37
+ """Keyword arguments to pass to the model."""
38
+ encode_kwargs: Dict[str, Any] = Field(default_factory=dict)
39
+ """Keyword arguments to pass when calling the `encode` method of the model."""
40
+ # query_instruction: str = DEFAULT_QUERY_BGE_INSTRUCTION_EN
41
+ """Instruction to use for embedding query."""
42
+
43
+ def __init__(self, **kwargs: Any):
44
+ """Initialize the sentence_transformer."""
45
+ super().__init__(**kwargs)
46
+ try:
47
+ import sentence_transformers
48
+
49
+ except ImportError as exc:
50
+ raise ImportError(
51
+ "Could not import sentence_transformers python package. "
52
+ "Please install it with `pip install sentence_transformers`."
53
+ ) from exc
54
+
55
+ self.client = sentence_transformers.SentenceTransformer(
56
+ self.model_name, cache_folder=self.cache_folder, **self.model_kwargs
57
+ )
58
+ # if "-zh" in self.model_name:
59
+ # self.query_instruction = DEFAULT_QUERY_BGE_INSTRUCTION_ZH
60
+
61
+ # class Config:
62
+ # """Configuration for this pydantic object."""
63
+
64
+ # extra = Extra.forbid
65
+
66
+ def embed_documents(self, texts: List[str]) -> List[List[float]]:
67
+ """Compute doc embeddings using a HuggingFace transformer model.
68
+
69
+ Args:
70
+ texts: The list of texts to embed.
71
+
72
+ Returns:
73
+ List of embeddings, one for each text.
74
+ """
75
+ texts = [t.replace("\n", " ") for t in texts]
76
+ embeddings = self.client.encode(texts, **self.encode_kwargs)
77
+ return embeddings.tolist()
78
+
79
+ def embed_query(self, text: str) -> List[float]:
80
+ """Compute query embeddings using a HuggingFace transformer model.
81
+
82
+ Args:
83
+ text: The text to embed.
84
+
85
+ Returns:
86
+ Embeddings for the text.
87
+ """
88
+ text = text.replace("\n", " ")
89
+ embedding = self.client.encode(text, **self.encode_kwargs
90
+ )
91
+ return embedding.tolist()
92
+
93
+
94
+
95
+
@@ -0,0 +1,104 @@
1
+ from typing import Any, Dict, List, Optional
2
+
3
+ import requests
4
+ from langchain_core.embeddings import Embeddings
5
+ from langchain_core.pydantic_v1 import BaseModel, Extra, Field
6
+ DEFAULT_Multilingual_MODEL = "intfloat/multilingual-e5-large"
7
+
8
+
9
+ class HuggingFaceMultilingualEmbeddings(BaseModel, Embeddings):
10
+ """HuggingFace BGE sentence_transformers embedding models.
11
+
12
+ To use, you should have the ``sentence_transformers`` python package installed.
13
+
14
+ Example:
15
+ .. code-block:: python
16
+
17
+ from langchain_community.embeddings import HuggingFaceBgeEmbeddings
18
+
19
+ model_name = "BAAI/bge-large-en"
20
+ model_kwargs = {'device': 'cpu'}
21
+ encode_kwargs = {'normalize_embeddings': True}
22
+ hf = HuggingFaceBgeEmbeddings(
23
+
24
+ model_name=model_name,
25
+ model_kwargs=model_kwargs,
26
+ encode_kwargs=encode_kwargs
27
+ )
28
+ """
29
+
30
+ client: Any #: :meta private:
31
+ model_name: str = DEFAULT_Multilingual_MODEL
32
+ """Model name to use."""
33
+ cache_folder: Optional[str] = None
34
+ """Path to store models.
35
+ Can be also set by SENTENCE_TRANSFORMERS_HOME environment variable."""
36
+ model_kwargs: Dict[str, Any] = Field(default_factory=dict)
37
+ """Keyword arguments to pass to the model."""
38
+ encode_kwargs: Dict[str, Any] = Field(default_factory=dict)
39
+ """Keyword arguments to pass when calling the `encode` method of the model."""
40
+ # query_instruction: str = DEFAULT_QUERY_BGE_INSTRUCTION_EN
41
+ """Instruction to use for embedding query."""
42
+
43
+ def __init__(self, **kwargs: Any):
44
+ """Initialize the sentence_transformer."""
45
+ super().__init__(**kwargs)
46
+ try:
47
+ import sentence_transformers
48
+
49
+ except ImportError as exc:
50
+ raise ImportError(
51
+ "Could not import sentence_transformers python package. "
52
+ "Please install it with `pip install sentence_transformers`."
53
+ ) from exc
54
+
55
+ self.client = sentence_transformers.SentenceTransformer(
56
+ self.model_name, cache_folder=self.cache_folder, **self.model_kwargs
57
+ )
58
+ # if "-zh" in self.model_name:
59
+ # self.query_instruction = DEFAULT_QUERY_BGE_INSTRUCTION_ZH
60
+
61
+ # class Config:
62
+ # """Configuration for this pydantic object."""
63
+
64
+ # extra = Extra.forbid
65
+
66
+ def embed_documents(self, texts: List[str]) -> List[List[float]]:
67
+ """Compute doc embeddings using a HuggingFace transformer model.
68
+
69
+ Args:
70
+ texts: The list of texts to embed.
71
+
72
+ Returns:
73
+ List of embeddings, one for each text.
74
+ """
75
+ texts = [t.replace("\n", " ") for t in texts]
76
+ textschange = []
77
+ for t in texts:
78
+ textschange.append("passage: " + t)
79
+ # print(textschange)
80
+
81
+ embeddings = self.client.encode(textschange, **self.encode_kwargs)
82
+ return embeddings.tolist()
83
+
84
+ def embed_query(self, text: str) -> List[float]:
85
+ """Compute query embeddings using a HuggingFace transformer model.
86
+
87
+ Args:
88
+ text: The text to embed.
89
+
90
+ Returns:
91
+ Embeddings for the text.
92
+ """
93
+ text = text.replace("\n", " ")
94
+ text = "query: " + text
95
+ # print(text)
96
+ embedding = self.client.encode(text, **self.encode_kwargs
97
+ )
98
+ return embedding.tolist()
99
+
100
+
101
+
102
+
103
+
104
+
@@ -1,5 +1,7 @@
1
1
  from bisheng_langchain.retrievers.mix_es_vector import MixEsVectorRetriever
2
+ from bisheng_langchain.retrievers.ensemble import EnsembleRetriever
2
3
 
3
4
  __all__ = [
4
- "MixEsVectorRetriever"
5
+ "MixEsVectorRetriever",
6
+ "EnsembleRetriever"
5
7
  ]
@@ -0,0 +1,188 @@
1
+ """
2
+ Ensemble retriever that ensemble the results of
3
+ multiple retrievers by using weighted Reciprocal Rank Fusion
4
+ """
5
+
6
+ from typing import Any, Dict, List
7
+
8
+ from langchain_core.documents import Document
9
+ from langchain_core.pydantic_v1 import root_validator
10
+ from langchain_core.retrievers import BaseRetriever
11
+
12
+ from langchain.callbacks.manager import (
13
+ AsyncCallbackManagerForRetrieverRun,
14
+ CallbackManagerForRetrieverRun,
15
+ )
16
+
17
+
18
+ class EnsembleRetriever(BaseRetriever):
19
+ """Retriever that ensembles the multiple retrievers.
20
+
21
+ It uses a rank fusion.
22
+
23
+ Args:
24
+ retrievers: A list of retrievers to ensemble.
25
+ weights: A list of weights corresponding to the retrievers. Defaults to equal
26
+ weighting for all retrievers.
27
+ c: A constant added to the rank, controlling the balance between the importance
28
+ of high-ranked items and the consideration given to lower-ranked items.
29
+ Default is 60.
30
+ """
31
+
32
+ retrievers: List[BaseRetriever]
33
+ weights: List[float]
34
+ c: int = 60
35
+
36
+ @root_validator(pre=True)
37
+ def set_weights(cls, values: Dict[str, Any]) -> Dict[str, Any]:
38
+ if not values.get("weights"):
39
+ n_retrievers = len(values["retrievers"])
40
+ values["weights"] = [1 / n_retrievers] * n_retrievers
41
+ return values
42
+
43
+ def _get_relevant_documents(
44
+ self,
45
+ query: str,
46
+ *,
47
+ run_manager: CallbackManagerForRetrieverRun,
48
+ **kwagrs: Any,
49
+ ) -> List[Document]:
50
+ """
51
+ Get the relevant documents for a given query.
52
+
53
+ Args:
54
+ query: The query to search for.
55
+
56
+ Returns:
57
+ A list of reranked documents.
58
+ """
59
+
60
+ # Get fused result of the retrievers.
61
+ fused_documents = self.rank_fusion(query, run_manager, **kwagrs)
62
+
63
+ return fused_documents
64
+
65
+ async def _aget_relevant_documents(
66
+ self,
67
+ query: str,
68
+ *,
69
+ run_manager: AsyncCallbackManagerForRetrieverRun,
70
+ ) -> List[Document]:
71
+ """
72
+ Asynchronously get the relevant documents for a given query.
73
+
74
+ Args:
75
+ query: The query to search for.
76
+
77
+ Returns:
78
+ A list of reranked documents.
79
+ """
80
+
81
+ # Get fused result of the retrievers.
82
+ fused_documents = await self.arank_fusion(query, run_manager)
83
+
84
+ return fused_documents
85
+
86
+ def rank_fusion(
87
+ self,
88
+ query: str,
89
+ run_manager: CallbackManagerForRetrieverRun,
90
+ **kwagrs: Any,
91
+ ) -> List[Document]:
92
+ """
93
+ Retrieve the results of the retrievers and use rank_fusion_func to get
94
+ the final result.
95
+
96
+ Args:
97
+ query: The query to search for.
98
+
99
+ Returns:
100
+ A list of reranked documents.
101
+ """
102
+
103
+ # Get the results of all retrievers.
104
+ retriever_docs = [
105
+ retriever.get_relevant_documents(
106
+ query,
107
+ callbacks=run_manager.get_child(tag=f"retriever_{i+1}"),
108
+ **kwagrs,
109
+ )
110
+ for i, retriever in enumerate(self.retrievers)
111
+ ]
112
+
113
+ # apply rank fusion
114
+ fused_documents = self.weighted_reciprocal_rank(retriever_docs)
115
+
116
+ return fused_documents
117
+
118
+ async def arank_fusion(
119
+ self,
120
+ query: str,
121
+ run_manager: AsyncCallbackManagerForRetrieverRun,
122
+ **kwagrs: Any,
123
+ ) -> List[Document]:
124
+ """
125
+ Asynchronously retrieve the results of the retrievers
126
+ and use rank_fusion_func to get the final result.
127
+
128
+ Args:
129
+ query: The query to search for.
130
+
131
+ Returns:
132
+ A list of reranked documents.
133
+ """
134
+
135
+ # Get the results of all retrievers.
136
+ retriever_docs = [
137
+ await retriever.aget_relevant_documents(
138
+ query,
139
+ callbacks=run_manager.get_child(tag=f"retriever_{i+1}"),
140
+ **kwagrs,
141
+ )
142
+ for i, retriever in enumerate(self.retrievers)
143
+ ]
144
+
145
+ # apply rank fusion
146
+ fused_documents = self.weighted_reciprocal_rank(retriever_docs)
147
+
148
+ return fused_documents
149
+
150
+ def weighted_reciprocal_rank(self, doc_lists: List[List[Document]]) -> List[Document]:
151
+ """
152
+ Perform weighted Reciprocal Rank Fusion on multiple rank lists.
153
+ You can find more details about RRF here:
154
+ https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf
155
+
156
+ Args:
157
+ doc_lists: A list of rank lists, where each rank list contains unique items.
158
+
159
+ Returns:
160
+ list: The final aggregated list of items sorted by their weighted RRF
161
+ scores in descending order.
162
+ """
163
+ if len(doc_lists) != len(self.weights):
164
+ raise ValueError("Number of rank lists must be equal to the number of weights.")
165
+
166
+ # Create a union of all unique documents in the input doc_lists
167
+ all_documents = set()
168
+ for doc_list in doc_lists:
169
+ for doc in doc_list:
170
+ all_documents.add(doc.page_content)
171
+
172
+ # Initialize the RRF score dictionary for each document
173
+ rrf_score_dic = {doc: 0.0 for doc in all_documents}
174
+
175
+ # Calculate RRF scores for each document
176
+ for doc_list, weight in zip(doc_lists, self.weights):
177
+ for rank, doc in enumerate(doc_list, start=1):
178
+ rrf_score = weight * (1 / (rank + self.c))
179
+ rrf_score_dic[doc.page_content] += rrf_score
180
+
181
+ # Sort documents by their RRF scores in descending order
182
+ sorted_documents = sorted(rrf_score_dic.keys(), key=lambda x: rrf_score_dic[x], reverse=True)
183
+
184
+ # Map the sorted page_content back to the original document objects
185
+ page_content_to_doc_map = {doc.page_content: doc for doc_list in doc_lists for doc in doc_list}
186
+ sorted_docs = [page_content_to_doc_map[page_content] for page_content in sorted_documents]
187
+
188
+ return sorted_docs
@@ -120,11 +120,19 @@ class ElasticKeywordsSearch(VectorStore, ABC):
120
120
  self.llm_chain = llm_chain
121
121
  self.drop_old = drop_old
122
122
  _ssl_verify = ssl_verify or {}
123
+ self.elasticsearch_url = elasticsearch_url
124
+ self.ssl_verify = _ssl_verify
123
125
  try:
124
126
  self.client = elasticsearch.Elasticsearch(elasticsearch_url, **_ssl_verify)
125
127
  except ValueError as e:
126
128
  raise ValueError(f'Your elasticsearch client string is mis-formatted. Got error: {e} ')
127
129
 
130
+ if drop_old:
131
+ try:
132
+ self.client.indices.delete(index=index_name)
133
+ except elasticsearch.exceptions.NotFoundError:
134
+ pass
135
+
128
136
  def add_texts(
129
137
  self,
130
138
  texts: Iterable[str],
@@ -250,6 +258,7 @@ class ElasticKeywordsSearch(VectorStore, ABC):
250
258
  refresh_indices: bool = True,
251
259
  llm: Optional[BaseLLM] = None,
252
260
  prompt: Optional[PromptTemplate] = DEFAULT_PROMPT,
261
+ drop_old: Optional[bool] = False,
253
262
  **kwargs: Any,
254
263
  ) -> ElasticKeywordsSearch:
255
264
  """Construct ElasticKeywordsSearch wrapper from raw documents.
@@ -279,13 +288,11 @@ class ElasticKeywordsSearch(VectorStore, ABC):
279
288
  index_name = index_name or uuid.uuid4().hex
280
289
  if llm:
281
290
  llm_chain = LLMChain(llm=llm, prompt=prompt)
282
- vectorsearch = cls(elasticsearch_url, index_name, llm_chain=llm_chain, **kwargs)
291
+ vectorsearch = cls(elasticsearch_url, index_name, llm_chain=llm_chain, drop_old=drop_old, **kwargs)
283
292
  else:
284
- vectorsearch = cls(elasticsearch_url, index_name, **kwargs)
285
- vectorsearch.add_texts(texts,
286
- metadatas=metadatas,
287
- ids=ids,
288
- refresh_indices=refresh_indices)
293
+ vectorsearch = cls(elasticsearch_url, index_name, drop_old=drop_old, **kwargs)
294
+ vectorsearch.add_texts(texts, metadatas=metadatas, ids=ids, refresh_indices=refresh_indices)
295
+
289
296
  return vectorsearch
290
297
 
291
298
  def create_index(self, client: Any, index_name: str, mapping: Dict) -> None:
@@ -8,8 +8,8 @@ from uuid import uuid4
8
8
  import numpy as np
9
9
  from langchain.docstore.document import Document
10
10
  from langchain.embeddings.base import Embeddings
11
- from langchain.vectorstores.milvus import Milvus as MilvusLangchain
12
11
  from langchain.vectorstores.utils import maximal_marginal_relevance
12
+ from langchain_community.vectorstores.milvus import Milvus as MilvusLangchain
13
13
 
14
14
  logger = logging.getLogger(__name__)
15
15
 
@@ -185,6 +185,7 @@ class Milvus(MilvusLangchain):
185
185
  self.index_params = index_params
186
186
  self.search_params = search_params
187
187
  self.consistency_level = consistency_level
188
+ self.connection_args = connection_args
188
189
 
189
190
  # In order for a collection to be compatible, pk needs to be auto'id and int
190
191
  self._primary_field = primary_field
@@ -199,15 +200,23 @@ class Milvus(MilvusLangchain):
199
200
  # Create the connection to the server
200
201
  if connection_args is None:
201
202
  connection_args = DEFAULT_MILVUS_CONNECTION
203
+ # if 'timeout' not in connection_args:
204
+ # connection_args['timeout'] = 30
205
+
202
206
  self.alias = self._create_connection_alias(connection_args)
203
207
  self.col: Optional[Collection] = None
204
208
 
205
209
  # Grab the existing collection if it exists
206
- if utility.has_collection(self.collection_name, using=self.alias):
207
- self.col = Collection(
208
- self.collection_name,
209
- using=self.alias,
210
- )
210
+ try:
211
+ if utility.has_collection(self.collection_name, using=self.alias):
212
+ self.col = Collection(
213
+ self.collection_name,
214
+ using=self.alias,
215
+ )
216
+ except Exception as e:
217
+ logger.error(f'milvus operating error={str(e)}')
218
+ self.close_connection(self.alias)
219
+ raise e
211
220
  # If need to drop old, drop it
212
221
  if drop_old and isinstance(self.col, Collection):
213
222
  self.col.drop()
@@ -216,6 +225,10 @@ class Milvus(MilvusLangchain):
216
225
  # Initialize the vector store
217
226
  self._init()
218
227
 
228
+ def close_connection(self, using):
229
+ from pymilvus import connections
230
+ connections.remove_connection(using)
231
+
219
232
  def _create_connection_alias(self, connection_args: dict) -> str:
220
233
  """Create the connection to the Milvus server."""
221
234
  from pymilvus import MilvusException, connections
@@ -430,6 +443,7 @@ class Milvus(MilvusLangchain):
430
443
  metadatas: Optional[List[dict]] = None,
431
444
  timeout: Optional[int] = None,
432
445
  batch_size: int = 1000,
446
+ no_embedding: bool = False,
433
447
  **kwargs: Any,
434
448
  ) -> List[str]:
435
449
  """Insert text data into Milvus.
@@ -460,15 +474,20 @@ class Milvus(MilvusLangchain):
460
474
  from pymilvus import Collection, MilvusException
461
475
 
462
476
  texts = list(texts)
477
+ if not no_embedding:
478
+ try:
479
+ embeddings = self.embedding_func.embed_documents(texts)
480
+ except NotImplementedError:
481
+ embeddings = [self.embedding_func.embed_query(x) for x in texts]
463
482
 
464
- try:
465
- embeddings = self.embedding_func.embed_documents(texts)
466
- except NotImplementedError:
467
- embeddings = [self.embedding_func.embed_query(x) for x in texts]
468
-
469
- if len(embeddings) == 0:
470
- logger.debug('Nothing to insert, skipping.')
471
- return []
483
+ if len(embeddings) == 0:
484
+ logger.debug('Nothing to insert, skipping.')
485
+ return []
486
+ else:
487
+ embeddings = [[0.0]] * len(texts)
488
+ if len(embeddings) == 0:
489
+ logger.debug('Nothing to insert, skipping.')
490
+ return []
472
491
 
473
492
  # If the collection hasn't been initialized yet, perform all steps to do so
474
493
  if not isinstance(self.col, Collection):
@@ -842,6 +861,7 @@ class Milvus(MilvusLangchain):
842
861
  index_params: Optional[dict] = None,
843
862
  search_params: Optional[dict] = None,
844
863
  drop_old: bool = False,
864
+ no_embedding: bool = False,
845
865
  **kwargs: Any,
846
866
  ) -> Milvus:
847
867
  """Create a Milvus collection, indexes it with HNSW, and insert data.
@@ -877,7 +897,7 @@ class Milvus(MilvusLangchain):
877
897
  drop_old=drop_old,
878
898
  **kwargs,
879
899
  )
880
- vector_db.add_texts(texts=texts, metadatas=metadatas)
900
+ vector_db.add_texts(texts=texts, metadatas=metadatas, no_embedding=no_embedding)
881
901
  return vector_db
882
902
 
883
903
  @staticmethod
@@ -888,3 +908,22 @@ class Milvus(MilvusLangchain):
888
908
 
889
909
  def _select_relevance_score_fn(self) -> Callable[[float], float]:
890
910
  return self._relevance_score_fn
911
+
912
+ def query(self, expr: str, timeout: Optional[int] = None, **kwargs: Any) -> List[Document]:
913
+ output_fields = self.fields[:]
914
+ output_fields.remove(self._vector_field)
915
+ res = self.col.query(
916
+ expr=expr,
917
+ output_fields=output_fields,
918
+ timeout=timeout,
919
+ limit=1,
920
+ **kwargs,
921
+ )
922
+ # Organize results.
923
+ ret = []
924
+ for result in res:
925
+ meta = {x: result.get(x) for x in output_fields}
926
+ doc = Document(page_content=meta.pop(self._text_field), metadata=meta)
927
+ ret.append(doc)
928
+
929
+ return ret
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: bisheng-langchain
3
- Version: 0.2.3
3
+ Version: 0.2.3.2
4
4
  Summary: bisheng langchain modules
5
5
  Home-page: https://github.com/dataelement/bisheng
6
6
  Author: DataElem
@@ -6,7 +6,7 @@ bisheng_langchain/agents/chatglm_functions_agent/base.py,sha256=tyytq0XIFXpfxDP0
6
6
  bisheng_langchain/agents/chatglm_functions_agent/output_parser.py,sha256=M7vDzQFqFUMmL250FHeNKXMwatkCdD0x1D0hyqGYRAA,3497
7
7
  bisheng_langchain/agents/chatglm_functions_agent/prompt.py,sha256=OiBTRUOhvhSyO2jO2ByUUiaCrkK_tIUH9pMWWKs-aF4,992
8
8
  bisheng_langchain/agents/llm_functions_agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- bisheng_langchain/agents/llm_functions_agent/base.py,sha256=_bJNSYZJrY82Tcc0zwQLO4qyuTZbOkVDBA0hndGM12w,12311
9
+ bisheng_langchain/agents/llm_functions_agent/base.py,sha256=4mzDOGheLGcP55xrGcYkLiH4kIII2IJjtYLAJAu41es,12308
10
10
  bisheng_langchain/autogen_role/__init__.py,sha256=MnTGbAOK770JM9l95Qcxu93s2gNAmhlil7K9HdFG81o,430
11
11
  bisheng_langchain/autogen_role/assistant.py,sha256=VGCoxJaRxRG6ZIJa2TsxcLZbMbF4KC8PRB76DOuznNU,4736
12
12
  bisheng_langchain/autogen_role/custom.py,sha256=8xxtAzNF_N1fysyChynVD19t659Qvtcyj_LNiOrE7ew,2499
@@ -26,19 +26,19 @@ bisheng_langchain/chains/retrieval/retrieval_chain.py,sha256=7VLJ-IPVjKfmAVgVET4
26
26
  bisheng_langchain/chains/router/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
27
  bisheng_langchain/chains/router/multi_rule.py,sha256=BiFryj3-7rOxfttD-MyOkKWLCSGB9LVYd2rjOsIfQC8,375
28
28
  bisheng_langchain/chains/router/rule_router.py,sha256=R2YRUnwn7s_7DbsSn27uPn4cIV0D-5iXEORXir0tNGM,1835
29
- bisheng_langchain/chat_models/__init__.py,sha256=7NuGJAUgeCF9yDqe7D3Yw69_5COlsExg811TSDErpps,599
30
- bisheng_langchain/chat_models/host_llm.py,sha256=zbrWUf9Vvc9_8dlVLPoY7Cm0NL7WhE9DSd5F7xYhY2A,22420
29
+ bisheng_langchain/chat_models/__init__.py,sha256=4-HTLE_SXO4hmNJu6yQxiQKBt2IFca_ezllVBLmvbEE,635
30
+ bisheng_langchain/chat_models/host_llm.py,sha256=HY2HtMwiW-0TeyVlDZ85Vr0ldRhr3M7ICMdrWqll1OM,22774
31
31
  bisheng_langchain/chat_models/minimax.py,sha256=JLs_f6vWD9beZYUtjD4FG28G8tZHrGUAWOwdLIuJomw,13901
32
32
  bisheng_langchain/chat_models/proxy_llm.py,sha256=wzVBZik9WC3-f7kyQ1eu3Ooibqpcocln08knf5lV1Nw,17082
33
- bisheng_langchain/chat_models/qwen.py,sha256=jGx_tW-LPxfegE6NvY6wID8ps2SsP813atjXnc04C-s,18841
33
+ bisheng_langchain/chat_models/qwen.py,sha256=3_ncSsTJLaHH4FpWnfhU1ZJt0YlyhK4Utg_HSxepSiM,19172
34
34
  bisheng_langchain/chat_models/sensetime.py,sha256=fuQ5yYGO5F7o7iQ7us17MlL4TAWRRFCCpNN9bAF-ydc,17056
35
35
  bisheng_langchain/chat_models/wenxin.py,sha256=OBXmFWkUWZMu1lUz6hPAEawsbAcdgMWcm9WkJJLZyng,13671
36
36
  bisheng_langchain/chat_models/xunfeiai.py,sha256=Yz09-I8u6XhGVnT5mdel15Z3CCQZqApJkgnaxyiZNFk,14037
37
- bisheng_langchain/chat_models/zhipuai.py,sha256=KokWmDDwljsV2iFiRXZlylIaQRw4jDOq5aCnat53wnQ,14887
37
+ bisheng_langchain/chat_models/zhipuai.py,sha256=MgN8pFInUB6q5agZSnAOipYxTIxAAGhh-Zq6NXs9Hxc,15342
38
38
  bisheng_langchain/chat_models/interface/__init__.py,sha256=KwcZMPSxFiXu6joXoZEgq6THxZeDXA8neZcOuLKBpUk,443
39
39
  bisheng_langchain/chat_models/interface/minimax.py,sha256=tF3S7ryFtYVXwh7jHHH9z1eY8nMCy0iLiFocsPSJ3pA,4423
40
40
  bisheng_langchain/chat_models/interface/openai.py,sha256=v4kxxglJoVMJ9kxaRDIJnWHBSvjl9vRhzQb5Fr-keg0,2081
41
- bisheng_langchain/chat_models/interface/types.py,sha256=SmbbDCI7hdWmxqRVEprW0NMPa4-F914Y8vYUhdWZ7Aw,1138
41
+ bisheng_langchain/chat_models/interface/types.py,sha256=FZwQJPDnStQ3oJx5ubyGJlvrhnGCgqhhnZDYSLmqFOs,1141
42
42
  bisheng_langchain/chat_models/interface/utils.py,sha256=qww_uYsWDqK7cLuv-KzZmmlg9SZAHOi4R_6I6S4XLIk,65
43
43
  bisheng_langchain/chat_models/interface/wenxin.py,sha256=z_K1Nj78dDYYgiVIzc5sGkOiGr8OAoRwaKwmpWXssH0,4246
44
44
  bisheng_langchain/chat_models/interface/xunfei.py,sha256=DPHAZM_uHg0A8GnebgkRbLENhBW7bBtRHzKC0gFKZgc,7514
@@ -55,8 +55,10 @@ bisheng_langchain/document_loaders/parsers/ellm_client.py,sha256=B4Dea8xXXnGvB9j
55
55
  bisheng_langchain/document_loaders/parsers/image.py,sha256=7Vx4dD_WiSTojS4TMIJFxfE8nvze0kwNnwTd6f1cLds,938
56
56
  bisheng_langchain/document_loaders/parsers/ocr_client.py,sha256=rRh1coJYn24n7FaINBZH5yO6Edm9TRywY6UOXpcerVo,1612
57
57
  bisheng_langchain/document_loaders/parsers/test_image.py,sha256=EJHozq5oFfLBlLL5Lr6XFkrkvSttPpohprs9OjDzAKM,8685
58
- bisheng_langchain/embeddings/__init__.py,sha256=jaQ4F8e3yOvbwBhX-JJDmMHZrcDdIdtZLx752jbnb_E,309
59
- bisheng_langchain/embeddings/host_embedding.py,sha256=ZIHynOctSSSx2pPpEMw13vgPswJpUBMFsdO4ze0665o,6223
58
+ bisheng_langchain/embeddings/__init__.py,sha256=_zLLb9cH4Ct4UpKQhtXr7V2IQ7LUnlCKkKTroTE_Enk,534
59
+ bisheng_langchain/embeddings/host_embedding.py,sha256=CK_hZgOd3VJrkyh4Zyb3SrpkxlRRfy7ffanWfhkjIcE,6321
60
+ bisheng_langchain/embeddings/huggingfacegte.py,sha256=RPfSXu7oMv6vgIjLqrPZ1Qz3K0yEuYn7VO0u7m7PzK8,3192
61
+ bisheng_langchain/embeddings/huggingfacemultilingual.py,sha256=g7-yKJ-qIPUZQaRnGz312S-f3aJCGcdHemAR3znE-uo,3415
60
62
  bisheng_langchain/embeddings/wenxin.py,sha256=6zx53tSUguvny4gGe5CTmfwV-QtGqKmcT-Jlgf2xVUs,4737
61
63
  bisheng_langchain/embeddings/interface/__init__.py,sha256=GNY3tibpRxpAdAfSvQmXBKo0xKSLke_9y4clofi_WOE,98
62
64
  bisheng_langchain/embeddings/interface/types.py,sha256=VdurbtsnjCPdlOjPFcK2Mg6r9bJYYHb3tepvkk-y3nM,461
@@ -64,15 +66,16 @@ bisheng_langchain/embeddings/interface/wenxin.py,sha256=5d9gI4enmfkD80s0FHKiDt33
64
66
  bisheng_langchain/input_output/__init__.py,sha256=sW_GB7MlrHYsqY1Meb_LeimQqNsMz1gH-00Tqb2BUyM,153
65
67
  bisheng_langchain/input_output/input.py,sha256=I5YDmgbvvj1o2lO9wi8LE37wM0wP5jkhUREU32YrZMQ,1094
66
68
  bisheng_langchain/input_output/output.py,sha256=6U-az6-Cwz665C2YmcH3SYctWVjPFjmW8s70CA_qphk,11585
67
- bisheng_langchain/retrievers/__init__.py,sha256=TcyK31IMgFJcYaOCLd9O6qFzXt1VMbtLs-g4C6ml_3w,117
69
+ bisheng_langchain/retrievers/__init__.py,sha256=XqBeNyPyNCJf-SzNBiFlkxtjrtHUFTTi5pe2yPyOKrA,210
70
+ bisheng_langchain/retrievers/ensemble.py,sha256=nLsTKpJmaigrECCWzrvDUwhE-qs9Mg7gPRXfPo5qFMI,5942
68
71
  bisheng_langchain/retrievers/mix_es_vector.py,sha256=dSrrsuMPSgGiu181EOzACyIKiDXR0qNBQz_914USD3E,4465
69
72
  bisheng_langchain/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
70
73
  bisheng_langchain/utils/requests.py,sha256=20ooDlMDMkXig--rSyRqbnAlbGLscBvvkHzFk2AmyGM,8517
71
74
  bisheng_langchain/vectorstores/__init__.py,sha256=zCZgDe7LyQ0iDkfcm5UJ5NxwKQSRHnqrsjx700Fy11M,213
72
- bisheng_langchain/vectorstores/elastic_keywords_search.py,sha256=gt_uw_fSMcEZWxbiA3V0RyA-utLOZlUY-qxdwnsfZks,12664
73
- bisheng_langchain/vectorstores/milvus.py,sha256=44ZbDsIxdsbUnHOpEpCdrW5zvWnYvDdAVoDKjCFoyYI,34424
75
+ bisheng_langchain/vectorstores/elastic_keywords_search.py,sha256=ACUzgeTwzVOVrm0EqBXF_VhzwrWZJbKYQgqNSW5VhbQ,12929
76
+ bisheng_langchain/vectorstores/milvus.py,sha256=-gXIQzzmoPggLS2KEGUtp6kHg9peoawBlSVuOwQndHQ,35856
74
77
  bisheng_langchain/vectorstores/retriever.py,sha256=hj4nAAl352EV_ANnU2OHJn7omCH3nBK82ydo14KqMH4,4353
75
- bisheng_langchain-0.2.3.dist-info/METADATA,sha256=aS_6DGfFwxgfbvWFt84F7GZwRlm9PQ0b4gUfjmeiFPY,2297
76
- bisheng_langchain-0.2.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
77
- bisheng_langchain-0.2.3.dist-info/top_level.txt,sha256=Z6pPNyCo4ihyr9iqGQbH8sJiC4dAUwA_mAyGRQB5_Fs,18
78
- bisheng_langchain-0.2.3.dist-info/RECORD,,
78
+ bisheng_langchain-0.2.3.2.dist-info/METADATA,sha256=bBbyzXtoG8QPpc1m68dlwZBKBZffry7sCq_BY68h7fU,2299
79
+ bisheng_langchain-0.2.3.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
80
+ bisheng_langchain-0.2.3.2.dist-info/top_level.txt,sha256=Z6pPNyCo4ihyr9iqGQbH8sJiC4dAUwA_mAyGRQB5_Fs,18
81
+ bisheng_langchain-0.2.3.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.42.0)
2
+ Generator: bdist_wheel (0.43.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5