bisheng-langchain 0.3.7.dev1__py3-none-any.whl → 0.3.7.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bisheng_langchain/chains/qa_generation/base_v2.py +33 -14
- {bisheng_langchain-0.3.7.dev1.dist-info → bisheng_langchain-0.3.7.dev2.dist-info}/METADATA +1 -1
- {bisheng_langchain-0.3.7.dev1.dist-info → bisheng_langchain-0.3.7.dev2.dist-info}/RECORD +5 -5
- {bisheng_langchain-0.3.7.dev1.dist-info → bisheng_langchain-0.3.7.dev2.dist-info}/WHEEL +1 -1
- {bisheng_langchain-0.3.7.dev1.dist-info → bisheng_langchain-0.3.7.dev2.dist-info}/top_level.txt +0 -0
@@ -134,6 +134,8 @@ class TrainsetGenerator:
|
|
134
134
|
chunk_size: int = 1024,
|
135
135
|
seed: int = 42,
|
136
136
|
prompt: Optional[ChatPromptTemplate] = SEED_QUESTION_CHAT_PROMPT,
|
137
|
+
filter_lowquality_context: bool = False,
|
138
|
+
filter_lowquality_question: bool = False,
|
137
139
|
answer_prompt: Optional[HumanMessagePromptTemplate] = ANSWER_FORMULATE,
|
138
140
|
) -> None:
|
139
141
|
self.generator_llm = generator_llm
|
@@ -152,6 +154,8 @@ class TrainsetGenerator:
|
|
152
154
|
self.threshold = 5.0
|
153
155
|
self.rng = default_rng(seed)
|
154
156
|
self.prompt = prompt
|
157
|
+
self.filter_lowquality_context = filter_lowquality_context
|
158
|
+
self.filter_lowquality_question = filter_lowquality_question
|
155
159
|
if answer_prompt is None:
|
156
160
|
answer_prompt = ANSWER_FORMULATE
|
157
161
|
self.answer_prompt = answer_prompt
|
@@ -163,6 +167,8 @@ class TrainsetGenerator:
|
|
163
167
|
chunk_size: int = 512,
|
164
168
|
trainset_distribution: dict = DEFAULT_TRAIN_DISTRIBUTION,
|
165
169
|
prompt: Optional[ChatPromptTemplate] = SEED_QUESTION_CHAT_PROMPT,
|
170
|
+
filter_lowquality_context: bool = False,
|
171
|
+
filter_lowquality_question: bool = False,
|
166
172
|
answer_prompt: Optional[PromptTemplate] = ANSWER_FORMULATE,
|
167
173
|
):
|
168
174
|
generator_llm = llm
|
@@ -173,6 +179,8 @@ class TrainsetGenerator:
|
|
173
179
|
chunk_size=chunk_size,
|
174
180
|
trainset_distribution=trainset_distribution,
|
175
181
|
prompt=prompt,
|
182
|
+
filter_lowquality_context=filter_lowquality_context,
|
183
|
+
filter_lowquality_question=filter_lowquality_question,
|
176
184
|
answer_prompt=answer_prompt,
|
177
185
|
)
|
178
186
|
|
@@ -316,14 +324,17 @@ class TrainsetGenerator:
|
|
316
324
|
)
|
317
325
|
|
318
326
|
text_chunk = " ".join([node.get_content() for node in nodes])
|
319
|
-
|
320
|
-
|
321
|
-
|
327
|
+
if self.filter_lowquality_context:
|
328
|
+
score = self._filter_context(text_chunk)
|
329
|
+
if not score:
|
330
|
+
continue
|
322
331
|
seed_question = self._seed_question(text_chunk)
|
323
332
|
|
324
333
|
question = seed_question
|
325
|
-
|
326
|
-
|
334
|
+
if self.filter_lowquality_question:
|
335
|
+
is_valid_question = self._filter_question(question)
|
336
|
+
else:
|
337
|
+
is_valid_question = True
|
327
338
|
if is_valid_question:
|
328
339
|
context = [text_chunk] * len(question.split("\n"))
|
329
340
|
is_conv = len(context) > 1
|
@@ -361,6 +372,8 @@ class QAGenerationChainV2(Chain):
|
|
361
372
|
llm: BaseLanguageModel,
|
362
373
|
k: Optional[int] = None,
|
363
374
|
chunk_size: int = 512,
|
375
|
+
filter_lowquality_context: bool = False,
|
376
|
+
filter_lowquality_question: bool = False,
|
364
377
|
question_prompt: Optional[ChatPromptTemplate] = SEED_QUESTION_CHAT_PROMPT,
|
365
378
|
answer_prompt: Optional[HumanMessagePromptTemplate] = ANSWER_FORMULATE,
|
366
379
|
**kwargs: Any,
|
@@ -377,8 +390,14 @@ class QAGenerationChainV2(Chain):
|
|
377
390
|
Returns:
|
378
391
|
a QAGenerationChain class
|
379
392
|
"""
|
380
|
-
generator = TrainsetGenerator.from_default(
|
381
|
-
|
393
|
+
generator = TrainsetGenerator.from_default(
|
394
|
+
llm,
|
395
|
+
chunk_size=chunk_size,
|
396
|
+
prompt=question_prompt,
|
397
|
+
answer_prompt=answer_prompt,
|
398
|
+
filter_lowquality_context=filter_lowquality_context,
|
399
|
+
filter_lowquality_question=filter_lowquality_question
|
400
|
+
)
|
382
401
|
return cls(documents=documents, generator=generator, k=k, **kwargs)
|
383
402
|
|
384
403
|
@property
|
@@ -405,14 +424,14 @@ class QAGenerationChainV2(Chain):
|
|
405
424
|
dataset = self.generator.generate(documents=self.documents, train_size=self.k)
|
406
425
|
df = dataset.to_pandas()
|
407
426
|
qa_pairs = df.to_dict("records")
|
408
|
-
qa =
|
427
|
+
qa = []
|
409
428
|
for pair in qa_pairs:
|
410
|
-
qa
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
429
|
+
qa.append({
|
430
|
+
"question": pair["question"],
|
431
|
+
"answer": pair["ground_truth"][0],
|
432
|
+
"context": pair["ground_truth_context"][0],
|
433
|
+
})
|
434
|
+
qa = f'```json\n{json.dumps(qa, ensure_ascii=False, indent=4)}\n```'
|
416
435
|
return {self.output_key: qa}
|
417
436
|
|
418
437
|
async def _acall(
|
@@ -23,7 +23,7 @@ bisheng_langchain/chains/conversational_retrieval/__init__.py,sha256=47DEQpj8HBS
|
|
23
23
|
bisheng_langchain/chains/conversational_retrieval/base.py,sha256=XiqBqov6No-wTVCou6qyMT5p2JQgoQI7OLQOYH8XUos,5313
|
24
24
|
bisheng_langchain/chains/qa_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
25
25
|
bisheng_langchain/chains/qa_generation/base.py,sha256=VYGmLDB0bnlDQ6T8ivLP55wwFbMo9HOzlPEDUuRx5fU,4148
|
26
|
-
bisheng_langchain/chains/qa_generation/base_v2.py,sha256=
|
26
|
+
bisheng_langchain/chains/qa_generation/base_v2.py,sha256=2F2kGe3ermJraQu4oC-m8vm_ENBy_Zi4uHrJDcSOeJw,15460
|
27
27
|
bisheng_langchain/chains/qa_generation/prompt.py,sha256=4eJk9aDUYDN1qaaYRPy9EobCIncnwS8BbQaDFzzePtM,1944
|
28
28
|
bisheng_langchain/chains/qa_generation/prompt_v2.py,sha256=sQLanA_iOnLqrUIwzfTOTANt-1vJ44CM54HFDU8Jo1Q,8938
|
29
29
|
bisheng_langchain/chains/question_answering/__init__.py,sha256=_gOZMc-SWprK6xc-Jj64jcr9nc-G4YkZbEYwfJNq_bY,8795
|
@@ -155,7 +155,7 @@ bisheng_langchain/vectorstores/__init__.py,sha256=zCZgDe7LyQ0iDkfcm5UJ5NxwKQSRHn
|
|
155
155
|
bisheng_langchain/vectorstores/elastic_keywords_search.py,sha256=inZarhahRaesrvLqyeRCMQvHGAASY53opEVA0_o8S14,14901
|
156
156
|
bisheng_langchain/vectorstores/milvus.py,sha256=xh7NokraKg_Xc9ofz0RVfJ_I36ftnprLJtV-1NfaeyQ,37162
|
157
157
|
bisheng_langchain/vectorstores/retriever.py,sha256=hj4nAAl352EV_ANnU2OHJn7omCH3nBK82ydo14KqMH4,4353
|
158
|
-
bisheng_langchain-0.3.7.
|
159
|
-
bisheng_langchain-0.3.7.
|
160
|
-
bisheng_langchain-0.3.7.
|
161
|
-
bisheng_langchain-0.3.7.
|
158
|
+
bisheng_langchain-0.3.7.dev2.dist-info/METADATA,sha256=rPLG8c2G8ZAOn3mjAcIP4evhXJbe-CMeUQc9gtuIdCc,2476
|
159
|
+
bisheng_langchain-0.3.7.dev2.dist-info/WHEEL,sha256=bFJAMchF8aTQGUgMZzHJyDDMPTO3ToJ7x23SLJa1SVo,92
|
160
|
+
bisheng_langchain-0.3.7.dev2.dist-info/top_level.txt,sha256=Z6pPNyCo4ihyr9iqGQbH8sJiC4dAUwA_mAyGRQB5_Fs,18
|
161
|
+
bisheng_langchain-0.3.7.dev2.dist-info/RECORD,,
|
{bisheng_langchain-0.3.7.dev1.dist-info → bisheng_langchain-0.3.7.dev2.dist-info}/top_level.txt
RENAMED
File without changes
|