bisheng-langchain 0.3.7.dev1__py3-none-any.whl → 0.3.7.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -134,6 +134,8 @@ class TrainsetGenerator:
134
134
  chunk_size: int = 1024,
135
135
  seed: int = 42,
136
136
  prompt: Optional[ChatPromptTemplate] = SEED_QUESTION_CHAT_PROMPT,
137
+ filter_lowquality_context: bool = False,
138
+ filter_lowquality_question: bool = False,
137
139
  answer_prompt: Optional[HumanMessagePromptTemplate] = ANSWER_FORMULATE,
138
140
  ) -> None:
139
141
  self.generator_llm = generator_llm
@@ -152,6 +154,8 @@ class TrainsetGenerator:
152
154
  self.threshold = 5.0
153
155
  self.rng = default_rng(seed)
154
156
  self.prompt = prompt
157
+ self.filter_lowquality_context = filter_lowquality_context
158
+ self.filter_lowquality_question = filter_lowquality_question
155
159
  if answer_prompt is None:
156
160
  answer_prompt = ANSWER_FORMULATE
157
161
  self.answer_prompt = answer_prompt
@@ -163,6 +167,8 @@ class TrainsetGenerator:
163
167
  chunk_size: int = 512,
164
168
  trainset_distribution: dict = DEFAULT_TRAIN_DISTRIBUTION,
165
169
  prompt: Optional[ChatPromptTemplate] = SEED_QUESTION_CHAT_PROMPT,
170
+ filter_lowquality_context: bool = False,
171
+ filter_lowquality_question: bool = False,
166
172
  answer_prompt: Optional[PromptTemplate] = ANSWER_FORMULATE,
167
173
  ):
168
174
  generator_llm = llm
@@ -173,6 +179,8 @@ class TrainsetGenerator:
173
179
  chunk_size=chunk_size,
174
180
  trainset_distribution=trainset_distribution,
175
181
  prompt=prompt,
182
+ filter_lowquality_context=filter_lowquality_context,
183
+ filter_lowquality_question=filter_lowquality_question,
176
184
  answer_prompt=answer_prompt,
177
185
  )
178
186
 
@@ -316,14 +324,17 @@ class TrainsetGenerator:
316
324
  )
317
325
 
318
326
  text_chunk = " ".join([node.get_content() for node in nodes])
319
- score = self._filter_context(text_chunk)
320
- if not score:
321
- continue
327
+ if self.filter_lowquality_context:
328
+ score = self._filter_context(text_chunk)
329
+ if not score:
330
+ continue
322
331
  seed_question = self._seed_question(text_chunk)
323
332
 
324
333
  question = seed_question
325
- # is_valid_question = self._filter_question(question)
326
- is_valid_question = True
334
+ if self.filter_lowquality_question:
335
+ is_valid_question = self._filter_question(question)
336
+ else:
337
+ is_valid_question = True
327
338
  if is_valid_question:
328
339
  context = [text_chunk] * len(question.split("\n"))
329
340
  is_conv = len(context) > 1
@@ -361,6 +372,8 @@ class QAGenerationChainV2(Chain):
361
372
  llm: BaseLanguageModel,
362
373
  k: Optional[int] = None,
363
374
  chunk_size: int = 512,
375
+ filter_lowquality_context: bool = False,
376
+ filter_lowquality_question: bool = False,
364
377
  question_prompt: Optional[ChatPromptTemplate] = SEED_QUESTION_CHAT_PROMPT,
365
378
  answer_prompt: Optional[HumanMessagePromptTemplate] = ANSWER_FORMULATE,
366
379
  **kwargs: Any,
@@ -377,8 +390,14 @@ class QAGenerationChainV2(Chain):
377
390
  Returns:
378
391
  a QAGenerationChain class
379
392
  """
380
- generator = TrainsetGenerator.from_default(llm, chunk_size=chunk_size, prompt=question_prompt,
381
- answer_prompt=answer_prompt)
393
+ generator = TrainsetGenerator.from_default(
394
+ llm,
395
+ chunk_size=chunk_size,
396
+ prompt=question_prompt,
397
+ answer_prompt=answer_prompt,
398
+ filter_lowquality_context=filter_lowquality_context,
399
+ filter_lowquality_question=filter_lowquality_question
400
+ )
382
401
  return cls(documents=documents, generator=generator, k=k, **kwargs)
383
402
 
384
403
  @property
@@ -405,14 +424,14 @@ class QAGenerationChainV2(Chain):
405
424
  dataset = self.generator.generate(documents=self.documents, train_size=self.k)
406
425
  df = dataset.to_pandas()
407
426
  qa_pairs = df.to_dict("records")
408
- qa = ''
427
+ qa = []
409
428
  for pair in qa_pairs:
410
- qa += json.dumps(
411
- {
412
- "question": pair["question"],
413
- "answer": pair["ground_truth"][0],
414
- "context": pair["ground_truth_context"][0],
415
- }, ensure_ascii=False)
429
+ qa.append({
430
+ "question": pair["question"],
431
+ "answer": pair["ground_truth"][0],
432
+ "context": pair["ground_truth_context"][0],
433
+ })
434
+ qa = f'```json\n{json.dumps(qa, ensure_ascii=False, indent=4)}\n```'
416
435
  return {self.output_key: qa}
417
436
 
418
437
  async def _acall(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: bisheng-langchain
3
- Version: 0.3.7.dev1
3
+ Version: 0.3.7.dev2
4
4
  Summary: bisheng langchain modules
5
5
  Home-page: https://github.com/dataelement/bisheng
6
6
  Author: DataElem
@@ -23,7 +23,7 @@ bisheng_langchain/chains/conversational_retrieval/__init__.py,sha256=47DEQpj8HBS
23
23
  bisheng_langchain/chains/conversational_retrieval/base.py,sha256=XiqBqov6No-wTVCou6qyMT5p2JQgoQI7OLQOYH8XUos,5313
24
24
  bisheng_langchain/chains/qa_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
25
  bisheng_langchain/chains/qa_generation/base.py,sha256=VYGmLDB0bnlDQ6T8ivLP55wwFbMo9HOzlPEDUuRx5fU,4148
26
- bisheng_langchain/chains/qa_generation/base_v2.py,sha256=ZtHEuNFwbE9txCGR3wx0oDAoj9V6bAxi3GXF8Z78cqQ,14580
26
+ bisheng_langchain/chains/qa_generation/base_v2.py,sha256=2F2kGe3ermJraQu4oC-m8vm_ENBy_Zi4uHrJDcSOeJw,15460
27
27
  bisheng_langchain/chains/qa_generation/prompt.py,sha256=4eJk9aDUYDN1qaaYRPy9EobCIncnwS8BbQaDFzzePtM,1944
28
28
  bisheng_langchain/chains/qa_generation/prompt_v2.py,sha256=sQLanA_iOnLqrUIwzfTOTANt-1vJ44CM54HFDU8Jo1Q,8938
29
29
  bisheng_langchain/chains/question_answering/__init__.py,sha256=_gOZMc-SWprK6xc-Jj64jcr9nc-G4YkZbEYwfJNq_bY,8795
@@ -155,7 +155,7 @@ bisheng_langchain/vectorstores/__init__.py,sha256=zCZgDe7LyQ0iDkfcm5UJ5NxwKQSRHn
155
155
  bisheng_langchain/vectorstores/elastic_keywords_search.py,sha256=inZarhahRaesrvLqyeRCMQvHGAASY53opEVA0_o8S14,14901
156
156
  bisheng_langchain/vectorstores/milvus.py,sha256=xh7NokraKg_Xc9ofz0RVfJ_I36ftnprLJtV-1NfaeyQ,37162
157
157
  bisheng_langchain/vectorstores/retriever.py,sha256=hj4nAAl352EV_ANnU2OHJn7omCH3nBK82ydo14KqMH4,4353
158
- bisheng_langchain-0.3.7.dev1.dist-info/METADATA,sha256=PLzscscApd7BJDPDM4tQHtUUxaP9Rxjo-v1LYCLnTV0,2476
159
- bisheng_langchain-0.3.7.dev1.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
160
- bisheng_langchain-0.3.7.dev1.dist-info/top_level.txt,sha256=Z6pPNyCo4ihyr9iqGQbH8sJiC4dAUwA_mAyGRQB5_Fs,18
161
- bisheng_langchain-0.3.7.dev1.dist-info/RECORD,,
158
+ bisheng_langchain-0.3.7.dev2.dist-info/METADATA,sha256=rPLG8c2G8ZAOn3mjAcIP4evhXJbe-CMeUQc9gtuIdCc,2476
159
+ bisheng_langchain-0.3.7.dev2.dist-info/WHEEL,sha256=bFJAMchF8aTQGUgMZzHJyDDMPTO3ToJ7x23SLJa1SVo,92
160
+ bisheng_langchain-0.3.7.dev2.dist-info/top_level.txt,sha256=Z6pPNyCo4ihyr9iqGQbH8sJiC4dAUwA_mAyGRQB5_Fs,18
161
+ bisheng_langchain-0.3.7.dev2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.44.0)
2
+ Generator: bdist_wheel (0.45.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5