bisheng-langchain 0.3.4.dev3__py3-none-any.whl → 0.3.6.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bisheng_langchain/chains/qa_generation/base_v2.py +14 -33
- bisheng_langchain/document_loaders/elem_unstrcutured_loader.py +24 -5
- bisheng_langchain/gpts/tools/api_tools/sina.py +8 -2
- bisheng_langchain/rag/init_retrievers/keyword_retriever.py +1 -1
- bisheng_langchain/text_splitter.py +68 -56
- bisheng_langchain/vectorstores/elastic_keywords_search.py +45 -1
- bisheng_langchain/vectorstores/milvus.py +14 -3
- {bisheng_langchain-0.3.4.dev3.dist-info → bisheng_langchain-0.3.6.dev1.dist-info}/METADATA +2 -2
- {bisheng_langchain-0.3.4.dev3.dist-info → bisheng_langchain-0.3.6.dev1.dist-info}/RECORD +11 -11
- {bisheng_langchain-0.3.4.dev3.dist-info → bisheng_langchain-0.3.6.dev1.dist-info}/WHEEL +0 -0
- {bisheng_langchain-0.3.4.dev3.dist-info → bisheng_langchain-0.3.6.dev1.dist-info}/top_level.txt +0 -0
@@ -134,8 +134,6 @@ class TrainsetGenerator:
|
|
134
134
|
chunk_size: int = 1024,
|
135
135
|
seed: int = 42,
|
136
136
|
prompt: Optional[ChatPromptTemplate] = SEED_QUESTION_CHAT_PROMPT,
|
137
|
-
filter_lowquality_context: bool = False,
|
138
|
-
filter_lowquality_question: bool = False,
|
139
137
|
answer_prompt: Optional[HumanMessagePromptTemplate] = ANSWER_FORMULATE,
|
140
138
|
) -> None:
|
141
139
|
self.generator_llm = generator_llm
|
@@ -154,8 +152,6 @@ class TrainsetGenerator:
|
|
154
152
|
self.threshold = 5.0
|
155
153
|
self.rng = default_rng(seed)
|
156
154
|
self.prompt = prompt
|
157
|
-
self.filter_lowquality_context = filter_lowquality_context
|
158
|
-
self.filter_lowquality_question = filter_lowquality_question
|
159
155
|
if answer_prompt is None:
|
160
156
|
answer_prompt = ANSWER_FORMULATE
|
161
157
|
self.answer_prompt = answer_prompt
|
@@ -167,8 +163,6 @@ class TrainsetGenerator:
|
|
167
163
|
chunk_size: int = 512,
|
168
164
|
trainset_distribution: dict = DEFAULT_TRAIN_DISTRIBUTION,
|
169
165
|
prompt: Optional[ChatPromptTemplate] = SEED_QUESTION_CHAT_PROMPT,
|
170
|
-
filter_lowquality_context: bool = False,
|
171
|
-
filter_lowquality_question: bool = False,
|
172
166
|
answer_prompt: Optional[PromptTemplate] = ANSWER_FORMULATE,
|
173
167
|
):
|
174
168
|
generator_llm = llm
|
@@ -179,8 +173,6 @@ class TrainsetGenerator:
|
|
179
173
|
chunk_size=chunk_size,
|
180
174
|
trainset_distribution=trainset_distribution,
|
181
175
|
prompt=prompt,
|
182
|
-
filter_lowquality_context=filter_lowquality_context,
|
183
|
-
filter_lowquality_question=filter_lowquality_question,
|
184
176
|
answer_prompt=answer_prompt,
|
185
177
|
)
|
186
178
|
|
@@ -324,17 +316,14 @@ class TrainsetGenerator:
|
|
324
316
|
)
|
325
317
|
|
326
318
|
text_chunk = " ".join([node.get_content() for node in nodes])
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
continue
|
319
|
+
score = self._filter_context(text_chunk)
|
320
|
+
if not score:
|
321
|
+
continue
|
331
322
|
seed_question = self._seed_question(text_chunk)
|
332
323
|
|
333
324
|
question = seed_question
|
334
|
-
|
335
|
-
|
336
|
-
else:
|
337
|
-
is_valid_question = True
|
325
|
+
# is_valid_question = self._filter_question(question)
|
326
|
+
is_valid_question = True
|
338
327
|
if is_valid_question:
|
339
328
|
context = [text_chunk] * len(question.split("\n"))
|
340
329
|
is_conv = len(context) > 1
|
@@ -372,8 +361,6 @@ class QAGenerationChainV2(Chain):
|
|
372
361
|
llm: BaseLanguageModel,
|
373
362
|
k: Optional[int] = None,
|
374
363
|
chunk_size: int = 512,
|
375
|
-
filter_lowquality_context: bool = False,
|
376
|
-
filter_lowquality_question: bool = False,
|
377
364
|
question_prompt: Optional[ChatPromptTemplate] = SEED_QUESTION_CHAT_PROMPT,
|
378
365
|
answer_prompt: Optional[HumanMessagePromptTemplate] = ANSWER_FORMULATE,
|
379
366
|
**kwargs: Any,
|
@@ -390,14 +377,8 @@ class QAGenerationChainV2(Chain):
|
|
390
377
|
Returns:
|
391
378
|
a QAGenerationChain class
|
392
379
|
"""
|
393
|
-
generator = TrainsetGenerator.from_default(
|
394
|
-
|
395
|
-
chunk_size=chunk_size,
|
396
|
-
prompt=question_prompt,
|
397
|
-
answer_prompt=answer_prompt,
|
398
|
-
filter_lowquality_context=filter_lowquality_context,
|
399
|
-
filter_lowquality_question=filter_lowquality_question
|
400
|
-
)
|
380
|
+
generator = TrainsetGenerator.from_default(llm, chunk_size=chunk_size, prompt=question_prompt,
|
381
|
+
answer_prompt=answer_prompt)
|
401
382
|
return cls(documents=documents, generator=generator, k=k, **kwargs)
|
402
383
|
|
403
384
|
@property
|
@@ -424,14 +405,14 @@ class QAGenerationChainV2(Chain):
|
|
424
405
|
dataset = self.generator.generate(documents=self.documents, train_size=self.k)
|
425
406
|
df = dataset.to_pandas()
|
426
407
|
qa_pairs = df.to_dict("records")
|
427
|
-
qa =
|
408
|
+
qa = ''
|
428
409
|
for pair in qa_pairs:
|
429
|
-
qa.
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
410
|
+
qa += json.dumps(
|
411
|
+
{
|
412
|
+
"question": pair["question"],
|
413
|
+
"answer": pair["ground_truth"][0],
|
414
|
+
"context": pair["ground_truth_context"][0],
|
415
|
+
}, ensure_ascii=False)
|
435
416
|
return {self.output_key: qa}
|
436
417
|
|
437
418
|
async def _acall(
|
@@ -34,7 +34,10 @@ def merge_partitions(partitions):
|
|
34
34
|
elif label == 'Table':
|
35
35
|
doc_content.append('\n\n' + text)
|
36
36
|
else:
|
37
|
-
|
37
|
+
if last_label == 'Table':
|
38
|
+
doc_content.append(text_elem_sep * 2 + text)
|
39
|
+
else:
|
40
|
+
doc_content.append(text_elem_sep + text)
|
38
41
|
|
39
42
|
last_label = label
|
40
43
|
metadata['bboxes'].extend(list(map(lambda x: list(map(int, x)), extra_data['bboxes'])))
|
@@ -73,6 +76,7 @@ class ElemUnstructuredLoader(BasePDFLoader):
|
|
73
76
|
self.start = start
|
74
77
|
self.n = n
|
75
78
|
self.extra_kwargs = kwargs
|
79
|
+
self.partitions = None
|
76
80
|
super().__init__(file_path)
|
77
81
|
|
78
82
|
def load(self) -> List[Document]:
|
@@ -93,18 +97,33 @@ class ElemUnstructuredLoader(BasePDFLoader):
|
|
93
97
|
resp = resp.json()
|
94
98
|
if 200 != resp.get('status_code'):
|
95
99
|
logger.info(f'file partition {os.path.basename(self.file_name)} error resp={resp}')
|
100
|
+
raise Exception(f'file partition error {os.path.basename(self.file_name)} error resp={resp}')
|
96
101
|
partitions = resp['partitions']
|
97
|
-
if
|
98
|
-
logger.info(f'
|
102
|
+
if partitions:
|
103
|
+
logger.info(f'content_from_partitions')
|
104
|
+
self.partitions = partitions
|
105
|
+
content, metadata = merge_partitions(partitions)
|
106
|
+
elif resp.get('text'):
|
107
|
+
logger.info(f'content_from_text')
|
108
|
+
content = resp['text']
|
109
|
+
metadata = {
|
110
|
+
"bboxes": [],
|
111
|
+
"pages": [],
|
112
|
+
"indexes": [],
|
113
|
+
"types": [],
|
114
|
+
}
|
115
|
+
else:
|
116
|
+
logger.warning(f'content_is_empty resp={resp}')
|
117
|
+
content = ''
|
118
|
+
metadata = {}
|
119
|
+
|
99
120
|
logger.info(f'unstruct_return code={resp.get("status_code")}')
|
100
121
|
|
101
122
|
if resp.get('b64_pdf'):
|
102
123
|
with open(self.file_path, 'wb') as f:
|
103
124
|
f.write(base64.b64decode(resp['b64_pdf']))
|
104
125
|
|
105
|
-
content, metadata = merge_partitions(partitions)
|
106
126
|
metadata['source'] = self.file_name
|
107
|
-
|
108
127
|
doc = Document(page_content=content, metadata=metadata)
|
109
128
|
return [doc]
|
110
129
|
|
@@ -145,7 +145,10 @@ class StockInfo(APIToolBase):
|
|
145
145
|
if resp.status_code != 200:
|
146
146
|
logger.info('api_call_fail res={}', resp.text)
|
147
147
|
k_data = resp.text
|
148
|
-
|
148
|
+
k_data = kLinePattern.search(k_data)
|
149
|
+
if not k_data:
|
150
|
+
return '{}'
|
151
|
+
data_array = json.loads(k_data.group(1))
|
149
152
|
for item in data_array:
|
150
153
|
if item.get('day') == date:
|
151
154
|
return json.dumps(item)
|
@@ -173,7 +176,10 @@ class StockInfo(APIToolBase):
|
|
173
176
|
count = datetime.today() - date_obj
|
174
177
|
url = self.url.format(stockName=stock_number, stock=stock, count=count.days)
|
175
178
|
k_data = await self.async_client.aget(url)
|
176
|
-
|
179
|
+
k_data = kLinePattern.search(k_data)
|
180
|
+
if not k_data:
|
181
|
+
return '{}'
|
182
|
+
data_array = json.loads(k_data.group(1))
|
177
183
|
for item in data_array:
|
178
184
|
if item.get('day') == date:
|
179
185
|
return json.dumps(item)
|
@@ -16,7 +16,7 @@ from langchain.text_splitter import TextSplitter
|
|
16
16
|
|
17
17
|
|
18
18
|
class KeywordRetriever(BaseRetriever):
|
19
|
-
keyword_store:
|
19
|
+
keyword_store: ElasticKeywordsSearch
|
20
20
|
text_splitter: TextSplitter
|
21
21
|
search_type: str = 'similarity'
|
22
22
|
search_kwargs: dict = Field(default_factory=dict)
|
@@ -21,17 +21,22 @@ logger = logging.getLogger(__name__)
|
|
21
21
|
|
22
22
|
|
23
23
|
def _split_text_with_regex(
|
24
|
-
|
24
|
+
text: str, separator: str, keep_separator: bool, separator_rule: str
|
25
25
|
) -> List[str]:
|
26
26
|
# Now that we have the separator, split the text
|
27
27
|
if separator:
|
28
28
|
if keep_separator:
|
29
29
|
# The parentheses in the pattern keep the delimiters in the result.
|
30
30
|
_splits = re.split(f'({separator})', text)
|
31
|
-
|
32
|
-
if
|
33
|
-
splits
|
34
|
-
|
31
|
+
|
32
|
+
if separator_rule == "before":
|
33
|
+
splits = [_splits[i] + _splits[i + 1] for i in range(1, len(_splits), 2)]
|
34
|
+
if len(_splits) % 2 == 0:
|
35
|
+
splits += _splits[-1:]
|
36
|
+
splits = [_splits[0]] + splits
|
37
|
+
else:
|
38
|
+
splits = [_splits[i-1] + _splits[i] for i in range(1, len(_splits), 2)]
|
39
|
+
splits = splits + [_splits[-1]]
|
35
40
|
else:
|
36
41
|
splits = re.split(separator, text)
|
37
42
|
else:
|
@@ -82,11 +87,14 @@ class ElemCharacterTextSplitter(RecursiveCharacterTextSplitter):
|
|
82
87
|
"""
|
83
88
|
todo
|
84
89
|
"""
|
90
|
+
|
85
91
|
def __init__(
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
92
|
+
self,
|
93
|
+
separators: Optional[List[str]] = None,
|
94
|
+
separator_rule: Optional[List[str]] = None,
|
95
|
+
is_separator_regex: bool = False,
|
96
|
+
keep_separator: bool = True,
|
97
|
+
**kwargs: Any,
|
90
98
|
) -> None:
|
91
99
|
"""Create a new TextSplitter."""
|
92
100
|
super().__init__(
|
@@ -95,7 +103,9 @@ class ElemCharacterTextSplitter(RecursiveCharacterTextSplitter):
|
|
95
103
|
**kwargs
|
96
104
|
)
|
97
105
|
self._separators = separators or ['\n\n', '\n', ' ', '']
|
98
|
-
self.
|
106
|
+
self._separator_rule = separator_rule or ['after' for _ in range(4)]
|
107
|
+
self.separator_rule = {one: self._separator_rule[index] for index, one in enumerate(separators)}
|
108
|
+
self._is_separator_regex = is_separator_regex
|
99
109
|
|
100
110
|
def split_documents(self, documents: Iterable[Document]) -> List[Document]:
|
101
111
|
texts, metadatas = [], []
|
@@ -110,19 +120,21 @@ class ElemCharacterTextSplitter(RecursiveCharacterTextSplitter):
|
|
110
120
|
final_chunks = []
|
111
121
|
# Get appropriate separator to use
|
112
122
|
separator = separators[-1]
|
123
|
+
separator_rule = 'after'
|
113
124
|
new_separators = []
|
114
125
|
for i, _s in enumerate(separators):
|
115
126
|
_separator = _s if self._is_separator_regex else re.escape(_s)
|
127
|
+
separator_rule = self.separator_rule[_s]
|
116
128
|
if _s == '':
|
117
129
|
separator = _s
|
118
130
|
break
|
119
131
|
if re.search(_separator, text):
|
120
132
|
separator = _s
|
121
|
-
new_separators = separators[i + 1
|
133
|
+
new_separators = separators[i + 1:]
|
122
134
|
break
|
123
135
|
|
124
136
|
_separator = separator if self._is_separator_regex else re.escape(separator)
|
125
|
-
splits = _split_text_with_regex(text, _separator, self._keep_separator)
|
137
|
+
splits = _split_text_with_regex(text, _separator, self._keep_separator, separator_rule)
|
126
138
|
|
127
139
|
# Now go merging things, recursively splitting longer texts.
|
128
140
|
_good_splits = []
|
@@ -149,60 +161,60 @@ class ElemCharacterTextSplitter(RecursiveCharacterTextSplitter):
|
|
149
161
|
return self._split_text(text, self._separators)
|
150
162
|
|
151
163
|
def create_documents(
|
152
|
-
|
164
|
+
self, texts: List[str], metadatas: Optional[List[dict]] = None
|
153
165
|
) -> List[Document]:
|
154
166
|
"""Create documents from a list of texts."""
|
155
167
|
documents = []
|
156
168
|
for i, text in enumerate(texts):
|
157
169
|
index = -1
|
158
170
|
# metadata = copy.deepcopy(_metadatas[i])
|
159
|
-
indexes = metadatas[i]
|
160
|
-
pages = metadatas[i]
|
161
|
-
types = metadatas[i]
|
162
|
-
bboxes = metadatas[i]
|
171
|
+
indexes = metadatas[i].get('indexes', [])
|
172
|
+
pages = metadatas[i].get('pages', [])
|
173
|
+
types = metadatas[i].get('types', [])
|
174
|
+
bboxes = metadatas[i].get('bboxes', [])
|
163
175
|
searcher = IntervalSearch(indexes)
|
164
176
|
split_texts = self.split_text(text)
|
165
177
|
for chunk in split_texts:
|
166
178
|
new_metadata = copy.deepcopy(metadatas[i])
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
179
|
+
if indexes and bboxes:
|
180
|
+
index = text.find(chunk, index + 1)
|
181
|
+
inter0 = [index, index + len(chunk) - 1]
|
182
|
+
norm_inter = searcher.find(inter0)
|
183
|
+
new_metadata['chunk_bboxes'] = []
|
184
|
+
for j in range(norm_inter[0], norm_inter[1] + 1):
|
185
|
+
new_metadata['chunk_bboxes'].append(
|
186
|
+
{'page': pages[j], 'bbox': bboxes[j]})
|
187
|
+
|
188
|
+
c = Counter([types[j] for j in norm_inter])
|
189
|
+
chunk_type = c.most_common(1)[0][0]
|
190
|
+
new_metadata['chunk_type'] = chunk_type
|
191
|
+
new_metadata['source'] = metadatas[i].get('source', '')
|
192
|
+
|
193
|
+
# for chunk in split_texts:
|
194
|
+
# new_metadata = {}
|
195
|
+
# new_metadata['chunk_type'] = metadata.get('chunk_type', 'paragraph')
|
196
|
+
# new_metadata['bboxes'] = metadata.get('bboxes', [])
|
197
|
+
# new_metadata['source'] = metadata.get('source', '')
|
198
|
+
# # chunk's start index in text
|
199
|
+
# index = text.find(chunk, index + 1)
|
200
|
+
# new_metadata['start'] = metadata.get('start', 0) + index
|
201
|
+
# new_metadata['end'] = metadata.get('start', 0) + index + len(chunk) - 1
|
202
|
+
|
203
|
+
# if 'page' in metadata:
|
204
|
+
# new_metadata['page'] = metadata['page'][new_metadata['start']:new_metadata['end']+1]
|
205
|
+
# if 'token_to_bbox' in metadata:
|
206
|
+
# new_metadata['token_to_bbox'] = metadata['token_to_bbox'][new_metadata['start']:new_metadata['end']+1]
|
207
|
+
|
208
|
+
# if 'page' in new_metadata and 'token_to_bbox' in new_metadata:
|
209
|
+
# box_no_duplicates = set()
|
210
|
+
# for index in range(len(new_metadata['page'])):
|
211
|
+
# box_no_duplicates.add(
|
212
|
+
# (new_metadata['page'][index], new_metadata['token_to_bbox'][index]))
|
213
|
+
|
214
|
+
# new_metadata['chunk_bboxes'] = []
|
215
|
+
# for elem in box_no_duplicates:
|
216
|
+
# new_metadata['chunk_bboxes'].append(
|
217
|
+
# {'page': elem[0], 'bbox': new_metadata['bboxes'][elem[1]]})
|
206
218
|
|
207
219
|
new_doc = Document(page_content=chunk, metadata=new_metadata)
|
208
220
|
documents.append(new_doc)
|
@@ -13,6 +13,7 @@ from langchain.llms.base import BaseLLM
|
|
13
13
|
from langchain.prompts.prompt import PromptTemplate
|
14
14
|
from langchain.utils import get_from_dict_or_env
|
15
15
|
from langchain.vectorstores.base import VectorStore
|
16
|
+
from loguru import logger
|
16
17
|
|
17
18
|
if TYPE_CHECKING:
|
18
19
|
from elasticsearch import Elasticsearch # noqa: F401
|
@@ -326,6 +327,49 @@ class ElasticKeywordsSearch(VectorStore, ABC):
|
|
326
327
|
response = client.search(index=index_name, body={'query': script_query, 'size': size})
|
327
328
|
return response
|
328
329
|
|
329
|
-
def
|
330
|
+
def delete_index(self, **kwargs: Any) -> None:
|
330
331
|
# TODO: Check if this can be done in bulk
|
331
332
|
self.client.indices.delete(index=self.index_name)
|
333
|
+
|
334
|
+
def delete(
|
335
|
+
self,
|
336
|
+
ids: Optional[List[str]] = None,
|
337
|
+
refresh_indices: Optional[bool] = True,
|
338
|
+
**kwargs: Any,
|
339
|
+
) -> Optional[bool]:
|
340
|
+
"""Delete documents from the Elasticsearch index.
|
341
|
+
|
342
|
+
Args:
|
343
|
+
ids: List of ids of documents to delete.
|
344
|
+
refresh_indices: Whether to refresh the index
|
345
|
+
after deleting documents. Defaults to True.
|
346
|
+
"""
|
347
|
+
try:
|
348
|
+
from elasticsearch.helpers import BulkIndexError, bulk
|
349
|
+
except ImportError:
|
350
|
+
raise ImportError('Could not import elasticsearch python package. '
|
351
|
+
'Please install it with `pip install elasticsearch`.')
|
352
|
+
|
353
|
+
body = []
|
354
|
+
|
355
|
+
if ids is None:
|
356
|
+
raise ValueError('ids must be provided.')
|
357
|
+
|
358
|
+
for _id in ids:
|
359
|
+
body.append({'_op_type': 'delete', '_index': self.index_name, '_id': _id})
|
360
|
+
|
361
|
+
if len(body) > 0:
|
362
|
+
try:
|
363
|
+
bulk(self.client, body, refresh=refresh_indices, ignore_status=404)
|
364
|
+
logger.debug(f'Deleted {len(body)} texts from index')
|
365
|
+
|
366
|
+
return True
|
367
|
+
except BulkIndexError as e:
|
368
|
+
logger.error(f'Error deleting texts: {e}')
|
369
|
+
firstError = e.errors[0].get('index', {}).get('error', {})
|
370
|
+
logger.error(f"First error reason: {firstError.get('reason')}")
|
371
|
+
raise e
|
372
|
+
|
373
|
+
else:
|
374
|
+
logger.debug('No texts to delete from index')
|
375
|
+
return False
|
@@ -10,6 +10,7 @@ from langchain.docstore.document import Document
|
|
10
10
|
from langchain.embeddings.base import Embeddings
|
11
11
|
from langchain.vectorstores.utils import maximal_marginal_relevance
|
12
12
|
from langchain_community.vectorstores.milvus import Milvus as MilvusLangchain
|
13
|
+
from pymilvus.exceptions import ConnectionNotExistException
|
13
14
|
|
14
15
|
logger = logging.getLogger(__name__)
|
15
16
|
|
@@ -231,7 +232,7 @@ class Milvus(MilvusLangchain):
|
|
231
232
|
from pymilvus import connections
|
232
233
|
connections.remove_connection(using)
|
233
234
|
|
234
|
-
def _create_connection_alias(self, connection_args: dict) -> str:
|
235
|
+
def _create_connection_alias(self, connection_args: dict, personal_alias: str = None) -> str:
|
235
236
|
"""Create the connection to the Milvus server."""
|
236
237
|
from pymilvus import MilvusException, connections
|
237
238
|
|
@@ -269,7 +270,10 @@ class Milvus(MilvusLangchain):
|
|
269
270
|
return con[0]
|
270
271
|
|
271
272
|
# Generate a new connection if one doesn't exist
|
272
|
-
|
273
|
+
if personal_alias:
|
274
|
+
alias = personal_alias
|
275
|
+
else:
|
276
|
+
alias = uuid4().hex
|
273
277
|
try:
|
274
278
|
connections.connect(alias=alias, **connection_args)
|
275
279
|
logger.debug('Created new connection using: %s', alias)
|
@@ -522,7 +526,14 @@ class Milvus(MilvusLangchain):
|
|
522
526
|
insert_list = [insert_dict[x][i:end] for x in self.fields if x in insert_dict]
|
523
527
|
# Insert into the collection.
|
524
528
|
try:
|
525
|
-
res
|
529
|
+
res = self.col.insert(insert_list, timeout=timeout, **kwargs)
|
530
|
+
pks.extend(res.primary_keys)
|
531
|
+
except ConnectionNotExistException as e:
|
532
|
+
logger.warning("retrying connection to milvus")
|
533
|
+
# reconnect to milvus
|
534
|
+
self._create_connection_alias(self.connection_args, self.alias)
|
535
|
+
|
536
|
+
# insert data
|
526
537
|
res = self.col.insert(insert_list, timeout=timeout, **kwargs)
|
527
538
|
pks.extend(res.primary_keys)
|
528
539
|
except MilvusException as e:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: bisheng-langchain
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.6.dev1
|
4
4
|
Summary: bisheng langchain modules
|
5
5
|
Home-page: https://github.com/dataelement/bisheng
|
6
6
|
Author: DataElem
|
@@ -30,7 +30,7 @@ Requires-Dist: shapely==2.0.2
|
|
30
30
|
Requires-Dist: filetype==1.2.0
|
31
31
|
Requires-Dist: langgraph==0.0.50
|
32
32
|
Requires-Dist: openai==1.14.3
|
33
|
-
Requires-Dist: langchain-openai==0.1.
|
33
|
+
Requires-Dist: langchain-openai==0.1.5
|
34
34
|
Requires-Dist: llama-index==0.9.48
|
35
35
|
Requires-Dist: bisheng-ragas==1.0.0
|
36
36
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
bisheng_langchain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
bisheng_langchain/text_splitter.py,sha256=
|
2
|
+
bisheng_langchain/text_splitter.py,sha256=yYpzMa0c1tRZNGyuFFJsLvSeqberYHV2-BR28pQim8I,8794
|
3
3
|
bisheng_langchain/agents/__init__.py,sha256=ctsKj77fS8qlkhz_9sS_AhCjFvFNxEpJ9KBYVrApLRg,226
|
4
4
|
bisheng_langchain/agents/chatglm_functions_agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
bisheng_langchain/agents/chatglm_functions_agent/base.py,sha256=tyytq0XIFXpfxDP0s5QKeprKOunMqi1fHMfQ0-kOmDE,13674
|
@@ -23,7 +23,7 @@ bisheng_langchain/chains/conversational_retrieval/__init__.py,sha256=47DEQpj8HBS
|
|
23
23
|
bisheng_langchain/chains/conversational_retrieval/base.py,sha256=XiqBqov6No-wTVCou6qyMT5p2JQgoQI7OLQOYH8XUos,5313
|
24
24
|
bisheng_langchain/chains/qa_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
25
25
|
bisheng_langchain/chains/qa_generation/base.py,sha256=VYGmLDB0bnlDQ6T8ivLP55wwFbMo9HOzlPEDUuRx5fU,4148
|
26
|
-
bisheng_langchain/chains/qa_generation/base_v2.py,sha256=
|
26
|
+
bisheng_langchain/chains/qa_generation/base_v2.py,sha256=ZtHEuNFwbE9txCGR3wx0oDAoj9V6bAxi3GXF8Z78cqQ,14580
|
27
27
|
bisheng_langchain/chains/qa_generation/prompt.py,sha256=4eJk9aDUYDN1qaaYRPy9EobCIncnwS8BbQaDFzzePtM,1944
|
28
28
|
bisheng_langchain/chains/qa_generation/prompt_v2.py,sha256=sQLanA_iOnLqrUIwzfTOTANt-1vJ44CM54HFDU8Jo1Q,8938
|
29
29
|
bisheng_langchain/chains/question_answering/__init__.py,sha256=_gOZMc-SWprK6xc-Jj64jcr9nc-G4YkZbEYwfJNq_bY,8795
|
@@ -54,7 +54,7 @@ bisheng_langchain/document_loaders/custom_kv.py,sha256=xWUPhcr1hjbdya4zgEHG4Fl0s
|
|
54
54
|
bisheng_langchain/document_loaders/elem_html.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
55
55
|
bisheng_langchain/document_loaders/elem_image.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
56
56
|
bisheng_langchain/document_loaders/elem_pdf.py,sha256=K-TXILGNFLFjavhun_MFbUF4t2_WGA3Z-kbnr75lmW8,22243
|
57
|
-
bisheng_langchain/document_loaders/elem_unstrcutured_loader.py,sha256=
|
57
|
+
bisheng_langchain/document_loaders/elem_unstrcutured_loader.py,sha256=JW87AhzCY_KS_YYszyxU3GgPjxP4vWOHDfifJEpP5CI,8055
|
58
58
|
bisheng_langchain/document_loaders/universal_kv.py,sha256=ZdIgFIc2fH2kkvJNb7j2wi6FLS_PaaatVy6z_YNV2hw,4114
|
59
59
|
bisheng_langchain/document_loaders/parsers/__init__.py,sha256=OOM_FJkwaU-zNS58fASw0TH8FNT6VXKb0VrvisgdrII,171
|
60
60
|
bisheng_langchain/document_loaders/parsers/ellm_client.py,sha256=Y_CRYwBr-gFArOirF1b76KyI5N8eVpsLeDiIsKtYkpU,1641
|
@@ -93,7 +93,7 @@ bisheng_langchain/gpts/tools/api_tools/base.py,sha256=fWQSDIOVb4JZrtJ9ML9q2ycsAa
|
|
93
93
|
bisheng_langchain/gpts/tools/api_tools/flow.py,sha256=ot2YAYgQGWgUpb2nCECAmpqHY6m0SgzwkupF9kDT3lU,2461
|
94
94
|
bisheng_langchain/gpts/tools/api_tools/macro_data.py,sha256=FyG-qtl2ECS1CDKt6olN0eDTDM91d-UvDkMDBiVLgYQ,27429
|
95
95
|
bisheng_langchain/gpts/tools/api_tools/openapi.py,sha256=CzKt9FRkgngBcWgabD4emPqAXkAgagkD-pMjG680MTE,3903
|
96
|
-
bisheng_langchain/gpts/tools/api_tools/sina.py,sha256=
|
96
|
+
bisheng_langchain/gpts/tools/api_tools/sina.py,sha256=4KpK7_HUUtjpdJ-K4LjPlb-occyAZcRtmmCWqJ2BotE,9708
|
97
97
|
bisheng_langchain/gpts/tools/api_tools/tianyancha.py,sha256=abDAz-yAH1-2rKiSmZ6TgnrNUnpgAZpDY8oDiWfWapc,6684
|
98
98
|
bisheng_langchain/gpts/tools/bing_search/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
99
99
|
bisheng_langchain/gpts/tools/bing_search/tool.py,sha256=v_VlqcMplITA5go5qWA4qZ5p43E1-1s0bzmyY7H0hqY,1710
|
@@ -126,7 +126,7 @@ bisheng_langchain/rag/config/baseline_s2b_mix.yaml,sha256=rkPfzU2-mvjRrZ0zMHaQsn
|
|
126
126
|
bisheng_langchain/rag/config/baseline_v2.yaml,sha256=RP-DwIRIS_ZK8ixbXi2Z28rKqHD56pWmr2o2WWIwq3Y,2382
|
127
127
|
bisheng_langchain/rag/init_retrievers/__init__.py,sha256=qpLLAuqZPtumTlJj17Ie5AbDDmiUiDxYefg_pumqu-c,218
|
128
128
|
bisheng_langchain/rag/init_retrievers/baseline_vector_retriever.py,sha256=oRKZZpxlLQAtsubIcAXeXpf1a9h6Pt6uOtNTLeD2jps,2362
|
129
|
-
bisheng_langchain/rag/init_retrievers/keyword_retriever.py,sha256=
|
129
|
+
bisheng_langchain/rag/init_retrievers/keyword_retriever.py,sha256=NRT0fBx6HFR7j9IbRl_NBuqF7hnL-9v5GCqHpgnrfPQ,2523
|
130
130
|
bisheng_langchain/rag/init_retrievers/mix_retriever.py,sha256=Whxq4kjNPLsxnHcVo60usdFFwLTCD-1jO38q08LXkVQ,4653
|
131
131
|
bisheng_langchain/rag/init_retrievers/smaller_chunks_retriever.py,sha256=RQ7QLEOOhBrkw-EimXVJqIGa96D-KkNDik2h9hzg9fU,3805
|
132
132
|
bisheng_langchain/rag/prompts/__init__.py,sha256=IUCq9gzqGQN_6IDk0D_F5t3mOUI_KbmSzYnnXoX4VKE,223
|
@@ -150,10 +150,10 @@ bisheng_langchain/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG
|
|
150
150
|
bisheng_langchain/utils/azure_dalle_image_generator.py,sha256=96-_nO4hDSwyPE4rSYop5SgJ-U9CE2un4bTdW0E5RGU,6582
|
151
151
|
bisheng_langchain/utils/requests.py,sha256=vWGKyNTxApVeaVdKxqACfIT1Q8wMy-jC3kUv2Ce9Mzc,8688
|
152
152
|
bisheng_langchain/vectorstores/__init__.py,sha256=zCZgDe7LyQ0iDkfcm5UJ5NxwKQSRHnqrsjx700Fy11M,213
|
153
|
-
bisheng_langchain/vectorstores/elastic_keywords_search.py,sha256=
|
154
|
-
bisheng_langchain/vectorstores/milvus.py,sha256=
|
153
|
+
bisheng_langchain/vectorstores/elastic_keywords_search.py,sha256=inZarhahRaesrvLqyeRCMQvHGAASY53opEVA0_o8S14,14901
|
154
|
+
bisheng_langchain/vectorstores/milvus.py,sha256=xh7NokraKg_Xc9ofz0RVfJ_I36ftnprLJtV-1NfaeyQ,37162
|
155
155
|
bisheng_langchain/vectorstores/retriever.py,sha256=hj4nAAl352EV_ANnU2OHJn7omCH3nBK82ydo14KqMH4,4353
|
156
|
-
bisheng_langchain-0.3.
|
157
|
-
bisheng_langchain-0.3.
|
158
|
-
bisheng_langchain-0.3.
|
159
|
-
bisheng_langchain-0.3.
|
156
|
+
bisheng_langchain-0.3.6.dev1.dist-info/METADATA,sha256=KG32YRknnVoAxFzVKE_qMMQBjbhZen046fXQYyhXQvs,2476
|
157
|
+
bisheng_langchain-0.3.6.dev1.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
158
|
+
bisheng_langchain-0.3.6.dev1.dist-info/top_level.txt,sha256=Z6pPNyCo4ihyr9iqGQbH8sJiC4dAUwA_mAyGRQB5_Fs,18
|
159
|
+
bisheng_langchain-0.3.6.dev1.dist-info/RECORD,,
|
File without changes
|
{bisheng_langchain-0.3.4.dev3.dist-info → bisheng_langchain-0.3.6.dev1.dist-info}/top_level.txt
RENAMED
File without changes
|