bisheng-langchain 0.3.4.dev3__py3-none-any.whl → 0.3.6.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -134,8 +134,6 @@ class TrainsetGenerator:
134
134
  chunk_size: int = 1024,
135
135
  seed: int = 42,
136
136
  prompt: Optional[ChatPromptTemplate] = SEED_QUESTION_CHAT_PROMPT,
137
- filter_lowquality_context: bool = False,
138
- filter_lowquality_question: bool = False,
139
137
  answer_prompt: Optional[HumanMessagePromptTemplate] = ANSWER_FORMULATE,
140
138
  ) -> None:
141
139
  self.generator_llm = generator_llm
@@ -154,8 +152,6 @@ class TrainsetGenerator:
154
152
  self.threshold = 5.0
155
153
  self.rng = default_rng(seed)
156
154
  self.prompt = prompt
157
- self.filter_lowquality_context = filter_lowquality_context
158
- self.filter_lowquality_question = filter_lowquality_question
159
155
  if answer_prompt is None:
160
156
  answer_prompt = ANSWER_FORMULATE
161
157
  self.answer_prompt = answer_prompt
@@ -167,8 +163,6 @@ class TrainsetGenerator:
167
163
  chunk_size: int = 512,
168
164
  trainset_distribution: dict = DEFAULT_TRAIN_DISTRIBUTION,
169
165
  prompt: Optional[ChatPromptTemplate] = SEED_QUESTION_CHAT_PROMPT,
170
- filter_lowquality_context: bool = False,
171
- filter_lowquality_question: bool = False,
172
166
  answer_prompt: Optional[PromptTemplate] = ANSWER_FORMULATE,
173
167
  ):
174
168
  generator_llm = llm
@@ -179,8 +173,6 @@ class TrainsetGenerator:
179
173
  chunk_size=chunk_size,
180
174
  trainset_distribution=trainset_distribution,
181
175
  prompt=prompt,
182
- filter_lowquality_context=filter_lowquality_context,
183
- filter_lowquality_question=filter_lowquality_question,
184
176
  answer_prompt=answer_prompt,
185
177
  )
186
178
 
@@ -324,17 +316,14 @@ class TrainsetGenerator:
324
316
  )
325
317
 
326
318
  text_chunk = " ".join([node.get_content() for node in nodes])
327
- if self.filter_lowquality_context:
328
- score = self._filter_context(text_chunk)
329
- if not score:
330
- continue
319
+ score = self._filter_context(text_chunk)
320
+ if not score:
321
+ continue
331
322
  seed_question = self._seed_question(text_chunk)
332
323
 
333
324
  question = seed_question
334
- if self.filter_lowquality_question:
335
- is_valid_question = self._filter_question(question)
336
- else:
337
- is_valid_question = True
325
+ # is_valid_question = self._filter_question(question)
326
+ is_valid_question = True
338
327
  if is_valid_question:
339
328
  context = [text_chunk] * len(question.split("\n"))
340
329
  is_conv = len(context) > 1
@@ -372,8 +361,6 @@ class QAGenerationChainV2(Chain):
372
361
  llm: BaseLanguageModel,
373
362
  k: Optional[int] = None,
374
363
  chunk_size: int = 512,
375
- filter_lowquality_context: bool = False,
376
- filter_lowquality_question: bool = False,
377
364
  question_prompt: Optional[ChatPromptTemplate] = SEED_QUESTION_CHAT_PROMPT,
378
365
  answer_prompt: Optional[HumanMessagePromptTemplate] = ANSWER_FORMULATE,
379
366
  **kwargs: Any,
@@ -390,14 +377,8 @@ class QAGenerationChainV2(Chain):
390
377
  Returns:
391
378
  a QAGenerationChain class
392
379
  """
393
- generator = TrainsetGenerator.from_default(
394
- llm,
395
- chunk_size=chunk_size,
396
- prompt=question_prompt,
397
- answer_prompt=answer_prompt,
398
- filter_lowquality_context=filter_lowquality_context,
399
- filter_lowquality_question=filter_lowquality_question
400
- )
380
+ generator = TrainsetGenerator.from_default(llm, chunk_size=chunk_size, prompt=question_prompt,
381
+ answer_prompt=answer_prompt)
401
382
  return cls(documents=documents, generator=generator, k=k, **kwargs)
402
383
 
403
384
  @property
@@ -424,14 +405,14 @@ class QAGenerationChainV2(Chain):
424
405
  dataset = self.generator.generate(documents=self.documents, train_size=self.k)
425
406
  df = dataset.to_pandas()
426
407
  qa_pairs = df.to_dict("records")
427
- qa = []
408
+ qa = ''
428
409
  for pair in qa_pairs:
429
- qa.append({
430
- "question": pair["question"],
431
- "answer": pair["ground_truth"][0],
432
- "context": pair["ground_truth_context"][0],
433
- })
434
- qa = f'```json\n{json.dumps(qa, ensure_ascii=False, indent=4)}\n```'
410
+ qa += json.dumps(
411
+ {
412
+ "question": pair["question"],
413
+ "answer": pair["ground_truth"][0],
414
+ "context": pair["ground_truth_context"][0],
415
+ }, ensure_ascii=False)
435
416
  return {self.output_key: qa}
436
417
 
437
418
  async def _acall(
@@ -34,7 +34,10 @@ def merge_partitions(partitions):
34
34
  elif label == 'Table':
35
35
  doc_content.append('\n\n' + text)
36
36
  else:
37
- doc_content.append(text_elem_sep + text)
37
+ if last_label == 'Table':
38
+ doc_content.append(text_elem_sep * 2 + text)
39
+ else:
40
+ doc_content.append(text_elem_sep + text)
38
41
 
39
42
  last_label = label
40
43
  metadata['bboxes'].extend(list(map(lambda x: list(map(int, x)), extra_data['bboxes'])))
@@ -73,6 +76,7 @@ class ElemUnstructuredLoader(BasePDFLoader):
73
76
  self.start = start
74
77
  self.n = n
75
78
  self.extra_kwargs = kwargs
79
+ self.partitions = None
76
80
  super().__init__(file_path)
77
81
 
78
82
  def load(self) -> List[Document]:
@@ -93,18 +97,33 @@ class ElemUnstructuredLoader(BasePDFLoader):
93
97
  resp = resp.json()
94
98
  if 200 != resp.get('status_code'):
95
99
  logger.info(f'file partition {os.path.basename(self.file_name)} error resp={resp}')
100
+ raise Exception(f'file partition error {os.path.basename(self.file_name)} error resp={resp}')
96
101
  partitions = resp['partitions']
97
- if not partitions:
98
- logger.info(f'partition_error resp={resp}')
102
+ if partitions:
103
+ logger.info(f'content_from_partitions')
104
+ self.partitions = partitions
105
+ content, metadata = merge_partitions(partitions)
106
+ elif resp.get('text'):
107
+ logger.info(f'content_from_text')
108
+ content = resp['text']
109
+ metadata = {
110
+ "bboxes": [],
111
+ "pages": [],
112
+ "indexes": [],
113
+ "types": [],
114
+ }
115
+ else:
116
+ logger.warning(f'content_is_empty resp={resp}')
117
+ content = ''
118
+ metadata = {}
119
+
99
120
  logger.info(f'unstruct_return code={resp.get("status_code")}')
100
121
 
101
122
  if resp.get('b64_pdf'):
102
123
  with open(self.file_path, 'wb') as f:
103
124
  f.write(base64.b64decode(resp['b64_pdf']))
104
125
 
105
- content, metadata = merge_partitions(partitions)
106
126
  metadata['source'] = self.file_name
107
-
108
127
  doc = Document(page_content=content, metadata=metadata)
109
128
  return [doc]
110
129
 
@@ -145,7 +145,10 @@ class StockInfo(APIToolBase):
145
145
  if resp.status_code != 200:
146
146
  logger.info('api_call_fail res={}', resp.text)
147
147
  k_data = resp.text
148
- data_array = json.loads(kLinePattern.search(k_data).group(1))
148
+ k_data = kLinePattern.search(k_data)
149
+ if not k_data:
150
+ return '{}'
151
+ data_array = json.loads(k_data.group(1))
149
152
  for item in data_array:
150
153
  if item.get('day') == date:
151
154
  return json.dumps(item)
@@ -173,7 +176,10 @@ class StockInfo(APIToolBase):
173
176
  count = datetime.today() - date_obj
174
177
  url = self.url.format(stockName=stock_number, stock=stock, count=count.days)
175
178
  k_data = await self.async_client.aget(url)
176
- data_array = json.loads(kLinePattern.search(k_data).group(1))
179
+ k_data = kLinePattern.search(k_data)
180
+ if not k_data:
181
+ return '{}'
182
+ data_array = json.loads(k_data.group(1))
177
183
  for item in data_array:
178
184
  if item.get('day') == date:
179
185
  return json.dumps(item)
@@ -16,7 +16,7 @@ from langchain.text_splitter import TextSplitter
16
16
 
17
17
 
18
18
  class KeywordRetriever(BaseRetriever):
19
- keyword_store: VectorStore
19
+ keyword_store: ElasticKeywordsSearch
20
20
  text_splitter: TextSplitter
21
21
  search_type: str = 'similarity'
22
22
  search_kwargs: dict = Field(default_factory=dict)
@@ -21,17 +21,22 @@ logger = logging.getLogger(__name__)
21
21
 
22
22
 
23
23
  def _split_text_with_regex(
24
- text: str, separator: str, keep_separator: bool
24
+ text: str, separator: str, keep_separator: bool, separator_rule: str
25
25
  ) -> List[str]:
26
26
  # Now that we have the separator, split the text
27
27
  if separator:
28
28
  if keep_separator:
29
29
  # The parentheses in the pattern keep the delimiters in the result.
30
30
  _splits = re.split(f'({separator})', text)
31
- splits = [_splits[i] + _splits[i + 1] for i in range(1, len(_splits), 2)]
32
- if len(_splits) % 2 == 0:
33
- splits += _splits[-1:]
34
- splits = [_splits[0]] + splits
31
+
32
+ if separator_rule == "before":
33
+ splits = [_splits[i] + _splits[i + 1] for i in range(1, len(_splits), 2)]
34
+ if len(_splits) % 2 == 0:
35
+ splits += _splits[-1:]
36
+ splits = [_splits[0]] + splits
37
+ else:
38
+ splits = [_splits[i-1] + _splits[i] for i in range(1, len(_splits), 2)]
39
+ splits = splits + [_splits[-1]]
35
40
  else:
36
41
  splits = re.split(separator, text)
37
42
  else:
@@ -82,11 +87,14 @@ class ElemCharacterTextSplitter(RecursiveCharacterTextSplitter):
82
87
  """
83
88
  todo
84
89
  """
90
+
85
91
  def __init__(
86
- self,
87
- separators: Optional[List[str]] = None,
88
- keep_separator: bool = True,
89
- **kwargs: Any,
92
+ self,
93
+ separators: Optional[List[str]] = None,
94
+ separator_rule: Optional[List[str]] = None,
95
+ is_separator_regex: bool = False,
96
+ keep_separator: bool = True,
97
+ **kwargs: Any,
90
98
  ) -> None:
91
99
  """Create a new TextSplitter."""
92
100
  super().__init__(
@@ -95,7 +103,9 @@ class ElemCharacterTextSplitter(RecursiveCharacterTextSplitter):
95
103
  **kwargs
96
104
  )
97
105
  self._separators = separators or ['\n\n', '\n', ' ', '']
98
- self._is_separator_regex = False
106
+ self._separator_rule = separator_rule or ['after' for _ in range(4)]
107
+ self.separator_rule = {one: self._separator_rule[index] for index, one in enumerate(separators)}
108
+ self._is_separator_regex = is_separator_regex
99
109
 
100
110
  def split_documents(self, documents: Iterable[Document]) -> List[Document]:
101
111
  texts, metadatas = [], []
@@ -110,19 +120,21 @@ class ElemCharacterTextSplitter(RecursiveCharacterTextSplitter):
110
120
  final_chunks = []
111
121
  # Get appropriate separator to use
112
122
  separator = separators[-1]
123
+ separator_rule = 'after'
113
124
  new_separators = []
114
125
  for i, _s in enumerate(separators):
115
126
  _separator = _s if self._is_separator_regex else re.escape(_s)
127
+ separator_rule = self.separator_rule[_s]
116
128
  if _s == '':
117
129
  separator = _s
118
130
  break
119
131
  if re.search(_separator, text):
120
132
  separator = _s
121
- new_separators = separators[i + 1 :]
133
+ new_separators = separators[i + 1:]
122
134
  break
123
135
 
124
136
  _separator = separator if self._is_separator_regex else re.escape(separator)
125
- splits = _split_text_with_regex(text, _separator, self._keep_separator)
137
+ splits = _split_text_with_regex(text, _separator, self._keep_separator, separator_rule)
126
138
 
127
139
  # Now go merging things, recursively splitting longer texts.
128
140
  _good_splits = []
@@ -149,60 +161,60 @@ class ElemCharacterTextSplitter(RecursiveCharacterTextSplitter):
149
161
  return self._split_text(text, self._separators)
150
162
 
151
163
  def create_documents(
152
- self, texts: List[str], metadatas: Optional[List[dict]] = None
164
+ self, texts: List[str], metadatas: Optional[List[dict]] = None
153
165
  ) -> List[Document]:
154
166
  """Create documents from a list of texts."""
155
167
  documents = []
156
168
  for i, text in enumerate(texts):
157
169
  index = -1
158
170
  # metadata = copy.deepcopy(_metadatas[i])
159
- indexes = metadatas[i]['indexes']
160
- pages = metadatas[i]['pages']
161
- types = metadatas[i]['types']
162
- bboxes = metadatas[i]['bboxes']
171
+ indexes = metadatas[i].get('indexes', [])
172
+ pages = metadatas[i].get('pages', [])
173
+ types = metadatas[i].get('types', [])
174
+ bboxes = metadatas[i].get('bboxes', [])
163
175
  searcher = IntervalSearch(indexes)
164
176
  split_texts = self.split_text(text)
165
177
  for chunk in split_texts:
166
178
  new_metadata = copy.deepcopy(metadatas[i])
167
- index = text.find(chunk, index + 1)
168
- inter0 = [index, index + len(chunk) - 1]
169
- norm_inter = searcher.find(inter0)
170
- new_metadata['chunk_bboxes'] = []
171
- for j in range(norm_inter[0], norm_inter[1] + 1):
172
- new_metadata['chunk_bboxes'].append(
173
- {'page': pages[j], 'bbox': bboxes[j]})
174
-
175
- c = Counter([types[j] for j in norm_inter])
176
- chunk_type = c.most_common(1)[0][0]
177
- new_metadata['chunk_type'] = chunk_type
178
- new_metadata['source'] = metadatas[i].get('source', '')
179
-
180
-
181
- # for chunk in split_texts:
182
- # new_metadata = {}
183
- # new_metadata['chunk_type'] = metadata.get('chunk_type', 'paragraph')
184
- # new_metadata['bboxes'] = metadata.get('bboxes', [])
185
- # new_metadata['source'] = metadata.get('source', '')
186
- # # chunk's start index in text
187
- # index = text.find(chunk, index + 1)
188
- # new_metadata['start'] = metadata.get('start', 0) + index
189
- # new_metadata['end'] = metadata.get('start', 0) + index + len(chunk) - 1
190
-
191
- # if 'page' in metadata:
192
- # new_metadata['page'] = metadata['page'][new_metadata['start']:new_metadata['end']+1]
193
- # if 'token_to_bbox' in metadata:
194
- # new_metadata['token_to_bbox'] = metadata['token_to_bbox'][new_metadata['start']:new_metadata['end']+1]
195
-
196
- # if 'page' in new_metadata and 'token_to_bbox' in new_metadata:
197
- # box_no_duplicates = set()
198
- # for index in range(len(new_metadata['page'])):
199
- # box_no_duplicates.add(
200
- # (new_metadata['page'][index], new_metadata['token_to_bbox'][index]))
201
-
202
- # new_metadata['chunk_bboxes'] = []
203
- # for elem in box_no_duplicates:
204
- # new_metadata['chunk_bboxes'].append(
205
- # {'page': elem[0], 'bbox': new_metadata['bboxes'][elem[1]]})
179
+ if indexes and bboxes:
180
+ index = text.find(chunk, index + 1)
181
+ inter0 = [index, index + len(chunk) - 1]
182
+ norm_inter = searcher.find(inter0)
183
+ new_metadata['chunk_bboxes'] = []
184
+ for j in range(norm_inter[0], norm_inter[1] + 1):
185
+ new_metadata['chunk_bboxes'].append(
186
+ {'page': pages[j], 'bbox': bboxes[j]})
187
+
188
+ c = Counter([types[j] for j in norm_inter])
189
+ chunk_type = c.most_common(1)[0][0]
190
+ new_metadata['chunk_type'] = chunk_type
191
+ new_metadata['source'] = metadatas[i].get('source', '')
192
+
193
+ # for chunk in split_texts:
194
+ # new_metadata = {}
195
+ # new_metadata['chunk_type'] = metadata.get('chunk_type', 'paragraph')
196
+ # new_metadata['bboxes'] = metadata.get('bboxes', [])
197
+ # new_metadata['source'] = metadata.get('source', '')
198
+ # # chunk's start index in text
199
+ # index = text.find(chunk, index + 1)
200
+ # new_metadata['start'] = metadata.get('start', 0) + index
201
+ # new_metadata['end'] = metadata.get('start', 0) + index + len(chunk) - 1
202
+
203
+ # if 'page' in metadata:
204
+ # new_metadata['page'] = metadata['page'][new_metadata['start']:new_metadata['end']+1]
205
+ # if 'token_to_bbox' in metadata:
206
+ # new_metadata['token_to_bbox'] = metadata['token_to_bbox'][new_metadata['start']:new_metadata['end']+1]
207
+
208
+ # if 'page' in new_metadata and 'token_to_bbox' in new_metadata:
209
+ # box_no_duplicates = set()
210
+ # for index in range(len(new_metadata['page'])):
211
+ # box_no_duplicates.add(
212
+ # (new_metadata['page'][index], new_metadata['token_to_bbox'][index]))
213
+
214
+ # new_metadata['chunk_bboxes'] = []
215
+ # for elem in box_no_duplicates:
216
+ # new_metadata['chunk_bboxes'].append(
217
+ # {'page': elem[0], 'bbox': new_metadata['bboxes'][elem[1]]})
206
218
 
207
219
  new_doc = Document(page_content=chunk, metadata=new_metadata)
208
220
  documents.append(new_doc)
@@ -13,6 +13,7 @@ from langchain.llms.base import BaseLLM
13
13
  from langchain.prompts.prompt import PromptTemplate
14
14
  from langchain.utils import get_from_dict_or_env
15
15
  from langchain.vectorstores.base import VectorStore
16
+ from loguru import logger
16
17
 
17
18
  if TYPE_CHECKING:
18
19
  from elasticsearch import Elasticsearch # noqa: F401
@@ -326,6 +327,49 @@ class ElasticKeywordsSearch(VectorStore, ABC):
326
327
  response = client.search(index=index_name, body={'query': script_query, 'size': size})
327
328
  return response
328
329
 
329
- def delete(self, **kwargs: Any) -> None:
330
+ def delete_index(self, **kwargs: Any) -> None:
330
331
  # TODO: Check if this can be done in bulk
331
332
  self.client.indices.delete(index=self.index_name)
333
+
334
+ def delete(
335
+ self,
336
+ ids: Optional[List[str]] = None,
337
+ refresh_indices: Optional[bool] = True,
338
+ **kwargs: Any,
339
+ ) -> Optional[bool]:
340
+ """Delete documents from the Elasticsearch index.
341
+
342
+ Args:
343
+ ids: List of ids of documents to delete.
344
+ refresh_indices: Whether to refresh the index
345
+ after deleting documents. Defaults to True.
346
+ """
347
+ try:
348
+ from elasticsearch.helpers import BulkIndexError, bulk
349
+ except ImportError:
350
+ raise ImportError('Could not import elasticsearch python package. '
351
+ 'Please install it with `pip install elasticsearch`.')
352
+
353
+ body = []
354
+
355
+ if ids is None:
356
+ raise ValueError('ids must be provided.')
357
+
358
+ for _id in ids:
359
+ body.append({'_op_type': 'delete', '_index': self.index_name, '_id': _id})
360
+
361
+ if len(body) > 0:
362
+ try:
363
+ bulk(self.client, body, refresh=refresh_indices, ignore_status=404)
364
+ logger.debug(f'Deleted {len(body)} texts from index')
365
+
366
+ return True
367
+ except BulkIndexError as e:
368
+ logger.error(f'Error deleting texts: {e}')
369
+ firstError = e.errors[0].get('index', {}).get('error', {})
370
+ logger.error(f"First error reason: {firstError.get('reason')}")
371
+ raise e
372
+
373
+ else:
374
+ logger.debug('No texts to delete from index')
375
+ return False
@@ -10,6 +10,7 @@ from langchain.docstore.document import Document
10
10
  from langchain.embeddings.base import Embeddings
11
11
  from langchain.vectorstores.utils import maximal_marginal_relevance
12
12
  from langchain_community.vectorstores.milvus import Milvus as MilvusLangchain
13
+ from pymilvus.exceptions import ConnectionNotExistException
13
14
 
14
15
  logger = logging.getLogger(__name__)
15
16
 
@@ -231,7 +232,7 @@ class Milvus(MilvusLangchain):
231
232
  from pymilvus import connections
232
233
  connections.remove_connection(using)
233
234
 
234
- def _create_connection_alias(self, connection_args: dict) -> str:
235
+ def _create_connection_alias(self, connection_args: dict, personal_alias: str = None) -> str:
235
236
  """Create the connection to the Milvus server."""
236
237
  from pymilvus import MilvusException, connections
237
238
 
@@ -269,7 +270,10 @@ class Milvus(MilvusLangchain):
269
270
  return con[0]
270
271
 
271
272
  # Generate a new connection if one doesn't exist
272
- alias = uuid4().hex
273
+ if personal_alias:
274
+ alias = personal_alias
275
+ else:
276
+ alias = uuid4().hex
273
277
  try:
274
278
  connections.connect(alias=alias, **connection_args)
275
279
  logger.debug('Created new connection using: %s', alias)
@@ -522,7 +526,14 @@ class Milvus(MilvusLangchain):
522
526
  insert_list = [insert_dict[x][i:end] for x in self.fields if x in insert_dict]
523
527
  # Insert into the collection.
524
528
  try:
525
- res: Collection
529
+ res = self.col.insert(insert_list, timeout=timeout, **kwargs)
530
+ pks.extend(res.primary_keys)
531
+ except ConnectionNotExistException as e:
532
+ logger.warning("retrying connection to milvus")
533
+ # reconnect to milvus
534
+ self._create_connection_alias(self.connection_args, self.alias)
535
+
536
+ # insert data
526
537
  res = self.col.insert(insert_list, timeout=timeout, **kwargs)
527
538
  pks.extend(res.primary_keys)
528
539
  except MilvusException as e:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: bisheng-langchain
3
- Version: 0.3.4.dev3
3
+ Version: 0.3.6.dev1
4
4
  Summary: bisheng langchain modules
5
5
  Home-page: https://github.com/dataelement/bisheng
6
6
  Author: DataElem
@@ -30,7 +30,7 @@ Requires-Dist: shapely==2.0.2
30
30
  Requires-Dist: filetype==1.2.0
31
31
  Requires-Dist: langgraph==0.0.50
32
32
  Requires-Dist: openai==1.14.3
33
- Requires-Dist: langchain-openai==0.1.0
33
+ Requires-Dist: langchain-openai==0.1.5
34
34
  Requires-Dist: llama-index==0.9.48
35
35
  Requires-Dist: bisheng-ragas==1.0.0
36
36
 
@@ -1,5 +1,5 @@
1
1
  bisheng_langchain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- bisheng_langchain/text_splitter.py,sha256=8snY_Fojh-A1EEGXBSEqCh0N77KQc_dqsgeptAlf344,7934
2
+ bisheng_langchain/text_splitter.py,sha256=yYpzMa0c1tRZNGyuFFJsLvSeqberYHV2-BR28pQim8I,8794
3
3
  bisheng_langchain/agents/__init__.py,sha256=ctsKj77fS8qlkhz_9sS_AhCjFvFNxEpJ9KBYVrApLRg,226
4
4
  bisheng_langchain/agents/chatglm_functions_agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  bisheng_langchain/agents/chatglm_functions_agent/base.py,sha256=tyytq0XIFXpfxDP0s5QKeprKOunMqi1fHMfQ0-kOmDE,13674
@@ -23,7 +23,7 @@ bisheng_langchain/chains/conversational_retrieval/__init__.py,sha256=47DEQpj8HBS
23
23
  bisheng_langchain/chains/conversational_retrieval/base.py,sha256=XiqBqov6No-wTVCou6qyMT5p2JQgoQI7OLQOYH8XUos,5313
24
24
  bisheng_langchain/chains/qa_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
25
  bisheng_langchain/chains/qa_generation/base.py,sha256=VYGmLDB0bnlDQ6T8ivLP55wwFbMo9HOzlPEDUuRx5fU,4148
26
- bisheng_langchain/chains/qa_generation/base_v2.py,sha256=2F2kGe3ermJraQu4oC-m8vm_ENBy_Zi4uHrJDcSOeJw,15460
26
+ bisheng_langchain/chains/qa_generation/base_v2.py,sha256=ZtHEuNFwbE9txCGR3wx0oDAoj9V6bAxi3GXF8Z78cqQ,14580
27
27
  bisheng_langchain/chains/qa_generation/prompt.py,sha256=4eJk9aDUYDN1qaaYRPy9EobCIncnwS8BbQaDFzzePtM,1944
28
28
  bisheng_langchain/chains/qa_generation/prompt_v2.py,sha256=sQLanA_iOnLqrUIwzfTOTANt-1vJ44CM54HFDU8Jo1Q,8938
29
29
  bisheng_langchain/chains/question_answering/__init__.py,sha256=_gOZMc-SWprK6xc-Jj64jcr9nc-G4YkZbEYwfJNq_bY,8795
@@ -54,7 +54,7 @@ bisheng_langchain/document_loaders/custom_kv.py,sha256=xWUPhcr1hjbdya4zgEHG4Fl0s
54
54
  bisheng_langchain/document_loaders/elem_html.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
55
  bisheng_langchain/document_loaders/elem_image.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
56
  bisheng_langchain/document_loaders/elem_pdf.py,sha256=K-TXILGNFLFjavhun_MFbUF4t2_WGA3Z-kbnr75lmW8,22243
57
- bisheng_langchain/document_loaders/elem_unstrcutured_loader.py,sha256=FtoyfmE85CwZuMvr52_bqcHQCgypKCWMGwLZrzgQYbY,7353
57
+ bisheng_langchain/document_loaders/elem_unstrcutured_loader.py,sha256=JW87AhzCY_KS_YYszyxU3GgPjxP4vWOHDfifJEpP5CI,8055
58
58
  bisheng_langchain/document_loaders/universal_kv.py,sha256=ZdIgFIc2fH2kkvJNb7j2wi6FLS_PaaatVy6z_YNV2hw,4114
59
59
  bisheng_langchain/document_loaders/parsers/__init__.py,sha256=OOM_FJkwaU-zNS58fASw0TH8FNT6VXKb0VrvisgdrII,171
60
60
  bisheng_langchain/document_loaders/parsers/ellm_client.py,sha256=Y_CRYwBr-gFArOirF1b76KyI5N8eVpsLeDiIsKtYkpU,1641
@@ -93,7 +93,7 @@ bisheng_langchain/gpts/tools/api_tools/base.py,sha256=fWQSDIOVb4JZrtJ9ML9q2ycsAa
93
93
  bisheng_langchain/gpts/tools/api_tools/flow.py,sha256=ot2YAYgQGWgUpb2nCECAmpqHY6m0SgzwkupF9kDT3lU,2461
94
94
  bisheng_langchain/gpts/tools/api_tools/macro_data.py,sha256=FyG-qtl2ECS1CDKt6olN0eDTDM91d-UvDkMDBiVLgYQ,27429
95
95
  bisheng_langchain/gpts/tools/api_tools/openapi.py,sha256=CzKt9FRkgngBcWgabD4emPqAXkAgagkD-pMjG680MTE,3903
96
- bisheng_langchain/gpts/tools/api_tools/sina.py,sha256=GGA4ZYvNEpqBZ_l8MUYqgkI8xZe9XcGa9-KlHZVqr6I,9542
96
+ bisheng_langchain/gpts/tools/api_tools/sina.py,sha256=4KpK7_HUUtjpdJ-K4LjPlb-occyAZcRtmmCWqJ2BotE,9708
97
97
  bisheng_langchain/gpts/tools/api_tools/tianyancha.py,sha256=abDAz-yAH1-2rKiSmZ6TgnrNUnpgAZpDY8oDiWfWapc,6684
98
98
  bisheng_langchain/gpts/tools/bing_search/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
99
99
  bisheng_langchain/gpts/tools/bing_search/tool.py,sha256=v_VlqcMplITA5go5qWA4qZ5p43E1-1s0bzmyY7H0hqY,1710
@@ -126,7 +126,7 @@ bisheng_langchain/rag/config/baseline_s2b_mix.yaml,sha256=rkPfzU2-mvjRrZ0zMHaQsn
126
126
  bisheng_langchain/rag/config/baseline_v2.yaml,sha256=RP-DwIRIS_ZK8ixbXi2Z28rKqHD56pWmr2o2WWIwq3Y,2382
127
127
  bisheng_langchain/rag/init_retrievers/__init__.py,sha256=qpLLAuqZPtumTlJj17Ie5AbDDmiUiDxYefg_pumqu-c,218
128
128
  bisheng_langchain/rag/init_retrievers/baseline_vector_retriever.py,sha256=oRKZZpxlLQAtsubIcAXeXpf1a9h6Pt6uOtNTLeD2jps,2362
129
- bisheng_langchain/rag/init_retrievers/keyword_retriever.py,sha256=Da4Q5BrfN0GckJaeAgPYMlzQAp9ll7ZGGyvs7OdCQ5c,2513
129
+ bisheng_langchain/rag/init_retrievers/keyword_retriever.py,sha256=NRT0fBx6HFR7j9IbRl_NBuqF7hnL-9v5GCqHpgnrfPQ,2523
130
130
  bisheng_langchain/rag/init_retrievers/mix_retriever.py,sha256=Whxq4kjNPLsxnHcVo60usdFFwLTCD-1jO38q08LXkVQ,4653
131
131
  bisheng_langchain/rag/init_retrievers/smaller_chunks_retriever.py,sha256=RQ7QLEOOhBrkw-EimXVJqIGa96D-KkNDik2h9hzg9fU,3805
132
132
  bisheng_langchain/rag/prompts/__init__.py,sha256=IUCq9gzqGQN_6IDk0D_F5t3mOUI_KbmSzYnnXoX4VKE,223
@@ -150,10 +150,10 @@ bisheng_langchain/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG
150
150
  bisheng_langchain/utils/azure_dalle_image_generator.py,sha256=96-_nO4hDSwyPE4rSYop5SgJ-U9CE2un4bTdW0E5RGU,6582
151
151
  bisheng_langchain/utils/requests.py,sha256=vWGKyNTxApVeaVdKxqACfIT1Q8wMy-jC3kUv2Ce9Mzc,8688
152
152
  bisheng_langchain/vectorstores/__init__.py,sha256=zCZgDe7LyQ0iDkfcm5UJ5NxwKQSRHnqrsjx700Fy11M,213
153
- bisheng_langchain/vectorstores/elastic_keywords_search.py,sha256=Pm1rS50GJ0HWbjBsFDgs28SVuVbjGSRPOor6yJlnE7w,13347
154
- bisheng_langchain/vectorstores/milvus.py,sha256=8HHbIxoSbLYDFlFJSfmjLOfqGpOSZd24iVYWSYz3TX0,36637
153
+ bisheng_langchain/vectorstores/elastic_keywords_search.py,sha256=inZarhahRaesrvLqyeRCMQvHGAASY53opEVA0_o8S14,14901
154
+ bisheng_langchain/vectorstores/milvus.py,sha256=xh7NokraKg_Xc9ofz0RVfJ_I36ftnprLJtV-1NfaeyQ,37162
155
155
  bisheng_langchain/vectorstores/retriever.py,sha256=hj4nAAl352EV_ANnU2OHJn7omCH3nBK82ydo14KqMH4,4353
156
- bisheng_langchain-0.3.4.dev3.dist-info/METADATA,sha256=lPUBJcKemYQziXamiJz2lVpuYDlM0H-RBOUiH5sq_rc,2476
157
- bisheng_langchain-0.3.4.dev3.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
158
- bisheng_langchain-0.3.4.dev3.dist-info/top_level.txt,sha256=Z6pPNyCo4ihyr9iqGQbH8sJiC4dAUwA_mAyGRQB5_Fs,18
159
- bisheng_langchain-0.3.4.dev3.dist-info/RECORD,,
156
+ bisheng_langchain-0.3.6.dev1.dist-info/METADATA,sha256=KG32YRknnVoAxFzVKE_qMMQBjbhZen046fXQYyhXQvs,2476
157
+ bisheng_langchain-0.3.6.dev1.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
158
+ bisheng_langchain-0.3.6.dev1.dist-info/top_level.txt,sha256=Z6pPNyCo4ihyr9iqGQbH8sJiC4dAUwA_mAyGRQB5_Fs,18
159
+ bisheng_langchain-0.3.6.dev1.dist-info/RECORD,,