bisheng-langchain 0.3.0rc0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bisheng_langchain/chat_models/host_llm.py +1 -1
- bisheng_langchain/document_loaders/elem_unstrcutured_loader.py +5 -3
- bisheng_langchain/gpts/agent_types/llm_functions_agent.py +7 -1
- bisheng_langchain/gpts/assistant.py +8 -5
- bisheng_langchain/gpts/auto_optimization.py +28 -27
- bisheng_langchain/gpts/auto_tool_selected.py +14 -15
- bisheng_langchain/gpts/load_tools.py +53 -1
- bisheng_langchain/gpts/prompts/__init__.py +4 -2
- bisheng_langchain/gpts/prompts/assistant_prompt_base.py +1 -0
- bisheng_langchain/gpts/prompts/assistant_prompt_cohere.py +19 -0
- bisheng_langchain/gpts/prompts/opening_dialog_prompt.py +1 -1
- bisheng_langchain/gpts/tools/api_tools/__init__.py +1 -1
- bisheng_langchain/gpts/tools/api_tools/base.py +3 -3
- bisheng_langchain/gpts/tools/api_tools/flow.py +19 -7
- bisheng_langchain/gpts/tools/api_tools/macro_data.py +175 -4
- bisheng_langchain/gpts/tools/api_tools/openapi.py +101 -0
- bisheng_langchain/gpts/tools/api_tools/sina.py +2 -2
- bisheng_langchain/gpts/tools/code_interpreter/tool.py +118 -39
- bisheng_langchain/rag/__init__.py +5 -0
- bisheng_langchain/rag/bisheng_rag_pipeline.py +320 -0
- bisheng_langchain/rag/bisheng_rag_pipeline_v2.py +359 -0
- bisheng_langchain/rag/bisheng_rag_pipeline_v2_cohere_raw_prompting.py +376 -0
- bisheng_langchain/rag/bisheng_rag_tool.py +288 -0
- bisheng_langchain/rag/config/baseline.yaml +86 -0
- bisheng_langchain/rag/config/baseline_caibao.yaml +82 -0
- bisheng_langchain/rag/config/baseline_caibao_knowledge_v2.yaml +110 -0
- bisheng_langchain/rag/config/baseline_caibao_v2.yaml +112 -0
- bisheng_langchain/rag/config/baseline_demo_v2.yaml +92 -0
- bisheng_langchain/rag/config/baseline_s2b_mix.yaml +88 -0
- bisheng_langchain/rag/config/baseline_v2.yaml +90 -0
- bisheng_langchain/rag/extract_info.py +38 -0
- bisheng_langchain/rag/init_retrievers/__init__.py +4 -0
- bisheng_langchain/rag/init_retrievers/baseline_vector_retriever.py +61 -0
- bisheng_langchain/rag/init_retrievers/keyword_retriever.py +65 -0
- bisheng_langchain/rag/init_retrievers/mix_retriever.py +103 -0
- bisheng_langchain/rag/init_retrievers/smaller_chunks_retriever.py +92 -0
- bisheng_langchain/rag/prompts/__init__.py +9 -0
- bisheng_langchain/rag/prompts/extract_key_prompt.py +34 -0
- bisheng_langchain/rag/prompts/prompt.py +47 -0
- bisheng_langchain/rag/prompts/prompt_cohere.py +111 -0
- bisheng_langchain/rag/qa_corpus/__init__.py +0 -0
- bisheng_langchain/rag/qa_corpus/qa_generator.py +143 -0
- bisheng_langchain/rag/rerank/__init__.py +5 -0
- bisheng_langchain/rag/rerank/rerank.py +48 -0
- bisheng_langchain/rag/rerank/rerank_benchmark.py +139 -0
- bisheng_langchain/rag/run_qa_gen_web.py +47 -0
- bisheng_langchain/rag/run_rag_evaluate_web.py +55 -0
- bisheng_langchain/rag/scoring/__init__.py +0 -0
- bisheng_langchain/rag/scoring/llama_index_score.py +91 -0
- bisheng_langchain/rag/scoring/ragas_score.py +183 -0
- bisheng_langchain/rag/utils.py +181 -0
- bisheng_langchain/retrievers/ensemble.py +2 -1
- bisheng_langchain/vectorstores/elastic_keywords_search.py +2 -1
- {bisheng_langchain-0.3.0rc0.dist-info → bisheng_langchain-0.3.1.dist-info}/METADATA +1 -1
- {bisheng_langchain-0.3.0rc0.dist-info → bisheng_langchain-0.3.1.dist-info}/RECORD +57 -22
- bisheng_langchain/gpts/prompts/base_prompt.py +0 -1
- {bisheng_langchain-0.3.0rc0.dist-info → bisheng_langchain-0.3.1.dist-info}/WHEEL +0 -0
- {bisheng_langchain-0.3.0rc0.dist-info → bisheng_langchain-0.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,61 @@
|
|
1
|
+
import os
|
2
|
+
import uuid
|
3
|
+
from loguru import logger
|
4
|
+
from typing import Any, Dict, Iterable, List, Optional
|
5
|
+
|
6
|
+
from bisheng_langchain.vectorstores.milvus import Milvus
|
7
|
+
from langchain_core.documents import Document
|
8
|
+
from langchain_core.pydantic_v1 import Field
|
9
|
+
from langchain_core.retrievers import BaseRetriever
|
10
|
+
from langchain_core.vectorstores import VectorStore
|
11
|
+
|
12
|
+
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
|
13
|
+
from langchain.text_splitter import TextSplitter
|
14
|
+
|
15
|
+
|
16
|
+
class BaselineVectorRetriever(BaseRetriever):
|
17
|
+
vector_store: Milvus
|
18
|
+
text_splitter: TextSplitter
|
19
|
+
search_type: str = 'similarity'
|
20
|
+
search_kwargs: dict = Field(default_factory=dict)
|
21
|
+
|
22
|
+
def add_documents(
|
23
|
+
self,
|
24
|
+
documents: List[Document],
|
25
|
+
collection_name: str,
|
26
|
+
drop_old: bool = False,
|
27
|
+
**kwargs,
|
28
|
+
) -> None:
|
29
|
+
split_docs = self.text_splitter.split_documents(documents)
|
30
|
+
logger.info(f"BaselineVectorRetriever: split document into {len(split_docs)} chunks")
|
31
|
+
for chunk_index, split_doc in enumerate(split_docs):
|
32
|
+
if 'chunk_bboxes' in split_doc.metadata:
|
33
|
+
split_doc.metadata.pop('chunk_bboxes')
|
34
|
+
split_doc.metadata['chunk_index'] = chunk_index
|
35
|
+
if kwargs.get('add_aux_info', False):
|
36
|
+
split_doc.page_content = split_doc.metadata["source"] + '\n' + split_doc.metadata["title"] + '\n' + split_doc.page_content
|
37
|
+
|
38
|
+
connection_args = self.vector_store.connection_args
|
39
|
+
embedding_function = self.vector_store.embedding_func
|
40
|
+
self.vector_store.from_documents(
|
41
|
+
split_docs,
|
42
|
+
embedding=embedding_function,
|
43
|
+
collection_name=collection_name,
|
44
|
+
connection_args=connection_args,
|
45
|
+
drop_old=drop_old,
|
46
|
+
)
|
47
|
+
|
48
|
+
def _get_relevant_documents(
|
49
|
+
self,
|
50
|
+
query: str,
|
51
|
+
collection_name: Optional[str] = None,
|
52
|
+
) -> List[Document]:
|
53
|
+
if collection_name:
|
54
|
+
self.vector_store = self.vector_store.__class__(
|
55
|
+
collection_name=collection_name,
|
56
|
+
embedding_function=self.vector_store.embedding_func,
|
57
|
+
connection_args=self.vector_store.connection_args,
|
58
|
+
)
|
59
|
+
if self.search_type == 'similarity':
|
60
|
+
result = self.vector_store.similarity_search(query, **self.search_kwargs)
|
61
|
+
return result
|
@@ -0,0 +1,65 @@
|
|
1
|
+
import os
|
2
|
+
import uuid
|
3
|
+
from loguru import logger
|
4
|
+
from dataclasses import dataclass
|
5
|
+
from typing import Any, Dict, Iterable, List, Optional
|
6
|
+
|
7
|
+
from bisheng_langchain.vectorstores import ElasticKeywordsSearch
|
8
|
+
from bisheng_langchain.vectorstores.milvus import Milvus
|
9
|
+
from langchain_core.documents import Document
|
10
|
+
from langchain_core.pydantic_v1 import Field
|
11
|
+
from langchain_core.retrievers import BaseRetriever
|
12
|
+
from langchain_core.vectorstores import VectorStore
|
13
|
+
|
14
|
+
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
|
15
|
+
from langchain.text_splitter import TextSplitter
|
16
|
+
|
17
|
+
|
18
|
+
class KeywordRetriever(BaseRetriever):
|
19
|
+
keyword_store: ElasticKeywordsSearch
|
20
|
+
text_splitter: TextSplitter
|
21
|
+
search_type: str = 'similarity'
|
22
|
+
search_kwargs: dict = Field(default_factory=dict)
|
23
|
+
|
24
|
+
def add_documents(
|
25
|
+
self,
|
26
|
+
documents: List[Document],
|
27
|
+
collection_name: str,
|
28
|
+
drop_old: bool = False,
|
29
|
+
**kwargs,
|
30
|
+
) -> None:
|
31
|
+
split_docs = self.text_splitter.split_documents(documents)
|
32
|
+
logger.info(f"KeywordRetriever: split document into {len(split_docs)} chunks")
|
33
|
+
for chunk_index, split_doc in enumerate(split_docs):
|
34
|
+
if 'chunk_bboxes' in split_doc.metadata:
|
35
|
+
split_doc.metadata.pop('chunk_bboxes')
|
36
|
+
split_doc.metadata['chunk_index'] = chunk_index
|
37
|
+
if kwargs.get('add_aux_info', False):
|
38
|
+
split_doc.page_content = split_doc.metadata["source"] + '\n' + split_doc.metadata["title"] + '\n' + split_doc.page_content
|
39
|
+
|
40
|
+
elasticsearch_url = self.keyword_store.elasticsearch_url
|
41
|
+
ssl_verify = self.keyword_store.ssl_verify
|
42
|
+
self.keyword_store.from_documents(
|
43
|
+
split_docs,
|
44
|
+
embedding='',
|
45
|
+
index_name=collection_name,
|
46
|
+
elasticsearch_url=elasticsearch_url,
|
47
|
+
ssl_verify=ssl_verify,
|
48
|
+
drop_old=drop_old,
|
49
|
+
)
|
50
|
+
|
51
|
+
def _get_relevant_documents(
|
52
|
+
self,
|
53
|
+
query: str,
|
54
|
+
collection_name: Optional[str] = None,
|
55
|
+
) -> List[Document]:
|
56
|
+
if collection_name:
|
57
|
+
self.keyword_store = self.keyword_store.__class__(
|
58
|
+
index_name=collection_name,
|
59
|
+
elasticsearch_url=self.keyword_store.elasticsearch_url,
|
60
|
+
ssl_verify=self.keyword_store.ssl_verify,
|
61
|
+
llm_chain=self.keyword_store.llm_chain
|
62
|
+
)
|
63
|
+
if self.search_type == 'similarity':
|
64
|
+
result = self.keyword_store.similarity_search(query, **self.search_kwargs)
|
65
|
+
return result
|
@@ -0,0 +1,103 @@
|
|
1
|
+
from typing import Any, Dict, Iterable, List, Optional
|
2
|
+
|
3
|
+
from bisheng_langchain.vectorstores import ElasticKeywordsSearch
|
4
|
+
from bisheng_langchain.vectorstores.milvus import Milvus
|
5
|
+
from langchain_core.documents import Document
|
6
|
+
from langchain_core.pydantic_v1 import Field
|
7
|
+
from langchain_core.retrievers import BaseRetriever
|
8
|
+
|
9
|
+
from langchain.schema import BaseRetriever, Document
|
10
|
+
from langchain.text_splitter import TextSplitter
|
11
|
+
|
12
|
+
|
13
|
+
class MixRetriever(BaseRetriever):
|
14
|
+
vector_store: Milvus
|
15
|
+
keyword_store: ElasticKeywordsSearch
|
16
|
+
vector_text_splitter: TextSplitter
|
17
|
+
keyword_text_splitter: TextSplitter
|
18
|
+
# retrieval
|
19
|
+
search_type: str = 'similarity'
|
20
|
+
vector_search_kwargs: dict = Field(default_factory=dict)
|
21
|
+
keyword_search_kwargs: dict = Field(default_factory=dict)
|
22
|
+
combine_strategy: str = 'mix' # "keyword_front, vector_front, mix"
|
23
|
+
|
24
|
+
def add_documents(
|
25
|
+
self,
|
26
|
+
documents: List[Document],
|
27
|
+
collection_name: str,
|
28
|
+
drop_old: bool = False,
|
29
|
+
**kwargs,
|
30
|
+
) -> None:
|
31
|
+
vector_split_docs = self.vector_text_splitter.split_documents(documents)
|
32
|
+
for chunk_index, split_doc in enumerate(vector_split_docs):
|
33
|
+
if 'chunk_bboxes' in split_doc.metadata:
|
34
|
+
split_doc.metadata.pop('chunk_bboxes')
|
35
|
+
split_doc.metadata['chunk_index'] = chunk_index
|
36
|
+
if kwargs.get('add_aux_info', False):
|
37
|
+
split_doc.page_content = split_doc.metadata["source"] + '\n' + split_doc.metadata["title"] + '\n' + split_doc.page_content
|
38
|
+
keyword_split_docs = self.keyword_text_splitter.split_documents(documents)
|
39
|
+
for chunk_index, split_doc in enumerate(keyword_split_docs):
|
40
|
+
if 'chunk_bboxes' in split_doc.metadata:
|
41
|
+
split_doc.metadata.pop('chunk_bboxes')
|
42
|
+
split_doc.metadata['chunk_index'] = chunk_index
|
43
|
+
if kwargs.get('add_aux_info', False):
|
44
|
+
split_doc.page_content = split_doc.metadata["source"] + '\n' + split_doc.metadata["title"] + '\n' + split_doc.page_content
|
45
|
+
|
46
|
+
self.keyword_store.from_documents(
|
47
|
+
keyword_split_docs,
|
48
|
+
embedding='',
|
49
|
+
index_name=collection_name,
|
50
|
+
elasticsearch_url=self.keyword_store.elasticsearch_url,
|
51
|
+
ssl_verify=self.keyword_store.ssl_verify,
|
52
|
+
drop_old=drop_old,
|
53
|
+
)
|
54
|
+
|
55
|
+
self.vector_store.from_documents(
|
56
|
+
vector_split_docs,
|
57
|
+
embedding=self.vector_store.embedding_func,
|
58
|
+
collection_name=collection_name,
|
59
|
+
connection_args=self.vector_store.connection_args,
|
60
|
+
drop_old=drop_old,
|
61
|
+
)
|
62
|
+
|
63
|
+
def _get_relevant_documents(
|
64
|
+
self,
|
65
|
+
query: str,
|
66
|
+
collection_name: Optional[str] = None,
|
67
|
+
) -> List[Document]:
|
68
|
+
if collection_name:
|
69
|
+
self.keyword_store = self.keyword_store.__class__(
|
70
|
+
index_name=collection_name,
|
71
|
+
elasticsearch_url=self.keyword_store.elasticsearch_url,
|
72
|
+
ssl_verify=self.keyword_store.ssl_verify,
|
73
|
+
llm_chain=self.keyword_store.llm_chain
|
74
|
+
)
|
75
|
+
self.vector_store = self.vector_store.__class__(
|
76
|
+
collection_name=collection_name,
|
77
|
+
embedding_function=self.vector_store.embedding_func,
|
78
|
+
connection_args=self.vector_store.connection_args,
|
79
|
+
)
|
80
|
+
if self.search_type == 'similarity':
|
81
|
+
keyword_docs = self.keyword_store.similarity_search(query, **self.keyword_search_kwargs)
|
82
|
+
vector_docs = self.vector_store.similarity_search(query, **self.vector_search_kwargs)
|
83
|
+
if self.combine_strategy == 'keyword_front':
|
84
|
+
return keyword_docs + vector_docs
|
85
|
+
elif self.combine_strategy == 'vector_front':
|
86
|
+
return vector_docs + keyword_docs
|
87
|
+
elif self.combine_strategy == 'mix':
|
88
|
+
combine_docs = []
|
89
|
+
min_len = min(len(keyword_docs), len(vector_docs))
|
90
|
+
for i in range(min_len):
|
91
|
+
combine_docs.append(keyword_docs[i])
|
92
|
+
combine_docs.append(vector_docs[i])
|
93
|
+
combine_docs.extend(keyword_docs[min_len:])
|
94
|
+
combine_docs.extend(vector_docs[min_len:])
|
95
|
+
return combine_docs
|
96
|
+
else:
|
97
|
+
raise ValueError(
|
98
|
+
f'Expected combine_strategy to be one of '
|
99
|
+
f'(keyword_front, vector_front, mix),'
|
100
|
+
f'instead found {self.combine_strategy}'
|
101
|
+
)
|
102
|
+
else:
|
103
|
+
raise ValueError(f'Expected search_type to be one of (similarity), instead found {self.search_type}')
|
@@ -0,0 +1,92 @@
|
|
1
|
+
import uuid
|
2
|
+
from typing import List, Optional
|
3
|
+
|
4
|
+
from bisheng_langchain.vectorstores.milvus import Milvus
|
5
|
+
from langchain_core.documents import Document
|
6
|
+
from langchain_core.pydantic_v1 import Field
|
7
|
+
from langchain_core.retrievers import BaseRetriever
|
8
|
+
|
9
|
+
from langchain.text_splitter import TextSplitter
|
10
|
+
|
11
|
+
|
12
|
+
class SmallerChunksVectorRetriever(BaseRetriever):
|
13
|
+
vector_store: Milvus
|
14
|
+
child_search_kwargs: dict = Field(default_factory=dict)
|
15
|
+
"""Keyword arguments to pass to the search function."""
|
16
|
+
child_splitter: TextSplitter
|
17
|
+
parent_splitter: Optional[TextSplitter] = None
|
18
|
+
"""The text splitter to use to create parent documents.
|
19
|
+
If none, then the parent documents will be the raw documents passed in."""
|
20
|
+
id_key = 'doc_id'
|
21
|
+
|
22
|
+
def add_documents(
|
23
|
+
self,
|
24
|
+
documents: List[Document],
|
25
|
+
collection_name: str,
|
26
|
+
drop_old: bool = False,
|
27
|
+
**kwargs,
|
28
|
+
) -> None:
|
29
|
+
if self.parent_splitter is not None:
|
30
|
+
documents = self.parent_splitter.split_documents(documents)
|
31
|
+
for chunk_index, split_doc in enumerate(documents):
|
32
|
+
if 'chunk_bboxes' in split_doc.metadata:
|
33
|
+
split_doc.metadata.pop('chunk_bboxes')
|
34
|
+
split_doc.metadata['chunk_index'] = chunk_index
|
35
|
+
if kwargs.get('add_aux_info', False):
|
36
|
+
split_doc.page_content = split_doc.metadata["source"] + '\n' + split_doc.metadata["title"] + '\n' + split_doc.page_content
|
37
|
+
doc_ids = [str(uuid.uuid4()) for _ in documents]
|
38
|
+
|
39
|
+
par_docs = []
|
40
|
+
child_docs = []
|
41
|
+
for i, par_doc in enumerate(documents):
|
42
|
+
_id = doc_ids[i]
|
43
|
+
par_doc.metadata[self.id_key] = _id
|
44
|
+
sub_docs = self.child_splitter.split_documents([par_doc])
|
45
|
+
for _doc in sub_docs:
|
46
|
+
_doc.metadata[self.id_key] = _id
|
47
|
+
if kwargs.get('add_aux_info', False):
|
48
|
+
_doc.page_content = _doc.metadata["source"] + '\n' + _doc.metadata["title"] + '\n' + _doc.page_content
|
49
|
+
par_docs.append(par_doc)
|
50
|
+
child_docs.extend(sub_docs)
|
51
|
+
|
52
|
+
self.vector_store.from_documents(
|
53
|
+
par_docs,
|
54
|
+
embedding=self.vector_store.embedding_func,
|
55
|
+
collection_name=collection_name + 'parent',
|
56
|
+
connection_args=self.vector_store.connection_args,
|
57
|
+
drop_old=drop_old,
|
58
|
+
no_embedding=True,
|
59
|
+
)
|
60
|
+
self.vector_store.from_documents(
|
61
|
+
child_docs,
|
62
|
+
embedding=self.vector_store.embedding_func,
|
63
|
+
collection_name=collection_name + 'child',
|
64
|
+
connection_args=self.vector_store.connection_args,
|
65
|
+
drop_old=drop_old,
|
66
|
+
)
|
67
|
+
|
68
|
+
def _get_relevant_documents(
|
69
|
+
self,
|
70
|
+
query: str,
|
71
|
+
collection_name: Optional[str] = None,
|
72
|
+
) -> List[Document]:
|
73
|
+
if collection_name:
|
74
|
+
child_vectorstore = self.vector_store.__class__(
|
75
|
+
collection_name=collection_name + 'child',
|
76
|
+
embedding_function=self.vector_store.embedding_func,
|
77
|
+
connection_args=self.vector_store.connection_args,
|
78
|
+
)
|
79
|
+
parent_vectorstore = self.vector_store.__class__(
|
80
|
+
collection_name=collection_name + 'parent',
|
81
|
+
embedding_function=self.vector_store.embedding_func,
|
82
|
+
connection_args=self.vector_store.connection_args,
|
83
|
+
)
|
84
|
+
sub_docs = child_vectorstore.similarity_search(query, **self.child_search_kwargs)
|
85
|
+
doc_ids, ret = [], []
|
86
|
+
for doc in sub_docs:
|
87
|
+
doc_id = doc.metadata[self.id_key]
|
88
|
+
if doc_id not in doc_ids:
|
89
|
+
doc_ids.append(doc_id)
|
90
|
+
par_doc = parent_vectorstore.query(expr=f'{self.id_key} == "{doc_id}"')
|
91
|
+
ret.extend(par_doc)
|
92
|
+
return ret
|
@@ -0,0 +1,34 @@
|
|
1
|
+
from langchain.prompts.prompt import PromptTemplate
|
2
|
+
|
3
|
+
|
4
|
+
EXTRACT_KEY_PROMPT = PromptTemplate(
|
5
|
+
input_variables=['question'],
|
6
|
+
template="""分析给定Question,提取Question中包含的KeyWords,输出列表形式
|
7
|
+
|
8
|
+
Examples:
|
9
|
+
Question: 达梦公司在过去三年中的流动比率如下:2021年:3.74倍;2020年:2.82倍;2019年:2.05倍。
|
10
|
+
KeyWords: ['过去三年', '流动比率', '2021', '3.74', '2020', '2.82', '2019', '2.05']
|
11
|
+
|
12
|
+
----------------
|
13
|
+
Question: {question}
|
14
|
+
KeyWords: """,
|
15
|
+
)
|
16
|
+
|
17
|
+
# EXTRACT_KEY_PROMPT = PromptTemplate(
|
18
|
+
# input_variables=['question'],
|
19
|
+
# template="""分析给定Question,提取Question中包含的KeyWords,输出列表形式
|
20
|
+
|
21
|
+
# Examples:
|
22
|
+
# Question: 能否根据2020年金宇生物技术股份有限公司的年报,给我简要介绍一下报告期内公司的社会责任工作情况?
|
23
|
+
# KeyWords: ['报告期', '社会责任', '工作情况']
|
24
|
+
|
25
|
+
# Question: 请根据江化微2019年的年报,简要介绍报告期内公司主要销售客户的客户集中度情况,并结合同行业情况进行分析。
|
26
|
+
# KeyWords: ['报告期', '主要', '销售客户', '客户集中度', '同行业', '分析']
|
27
|
+
|
28
|
+
# Question: 请问,在苏州迈为科技股份有限公司2019年的年报中,现金流的情况是否发生了重大变化?若发生,导致重大变化的原因是什么?
|
29
|
+
# KeyWords: ['现金流', '重大变化', '原因']
|
30
|
+
|
31
|
+
# ----------------
|
32
|
+
# Question: {question}
|
33
|
+
# KeyWords: """,
|
34
|
+
# )
|
@@ -0,0 +1,47 @@
|
|
1
|
+
from langchain_core.prompts import PromptTemplate
|
2
|
+
from langchain_core.prompts.chat import (
|
3
|
+
ChatPromptTemplate,
|
4
|
+
HumanMessagePromptTemplate,
|
5
|
+
SystemMessagePromptTemplate,
|
6
|
+
)
|
7
|
+
|
8
|
+
|
9
|
+
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
10
|
+
|
11
|
+
{context}
|
12
|
+
|
13
|
+
Question: {question}
|
14
|
+
Helpful Answer:"""
|
15
|
+
BASE_PROMPT = PromptTemplate(
|
16
|
+
template=prompt_template, input_variables=["context", "question"]
|
17
|
+
)
|
18
|
+
|
19
|
+
|
20
|
+
system_template = """Use the following pieces of context to answer the user's question.
|
21
|
+
If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
22
|
+
----------------
|
23
|
+
{context}"""
|
24
|
+
messages = [
|
25
|
+
SystemMessagePromptTemplate.from_template(system_template),
|
26
|
+
HumanMessagePromptTemplate.from_template("{question}"),
|
27
|
+
]
|
28
|
+
CHAT_PROMPT = ChatPromptTemplate.from_messages(messages)
|
29
|
+
|
30
|
+
|
31
|
+
system_template_general = """你是一个准确且可靠的知识库问答助手,能够借助上下文知识回答问题。你需要根据以下的规则来回答问题:
|
32
|
+
1. 如果上下文中包含了正确答案,你需要根据上下文进行准确的回答。但是在回答前,你需要注意,上下文中的信息可能也包含存在事实性错误,如果文档中存在和事实不一致的错误,请根据事实回答。
|
33
|
+
2. 如果上下文中不包含答案,就说你不知道,不要试图编造答案。
|
34
|
+
3. 你需要根据上下文给出详细的回答,不要试图偷懒,不要遗漏括号中的信息,你必须回答的尽可能详细。
|
35
|
+
"""
|
36
|
+
human_template_general = """
|
37
|
+
上下文:
|
38
|
+
{context}
|
39
|
+
|
40
|
+
问题:
|
41
|
+
{question}
|
42
|
+
"""
|
43
|
+
messages_general = [
|
44
|
+
SystemMessagePromptTemplate.from_template(system_template_general),
|
45
|
+
HumanMessagePromptTemplate.from_template(human_template_general),
|
46
|
+
]
|
47
|
+
CHAT_PROMPT_GENERAL = ChatPromptTemplate.from_messages(messages_general)
|
@@ -0,0 +1,111 @@
|
|
1
|
+
from langchain_core.prompts import PromptTemplate
|
2
|
+
from langchain_core.prompts.chat import (
|
3
|
+
ChatPromptTemplate,
|
4
|
+
HumanMessagePromptTemplate,
|
5
|
+
SystemMessagePromptTemplate,
|
6
|
+
)
|
7
|
+
|
8
|
+
|
9
|
+
pirate_preamble = """
|
10
|
+
## Task & Context
|
11
|
+
你是一个准确且可靠的知识库问答助手,能够借助上下文知识回答用户的问题。
|
12
|
+
|
13
|
+
## Style Guide
|
14
|
+
你需要遵循以下的规则来回答用户问题:
|
15
|
+
- 如果给定的上下文中没有找到相关的答案,就说你不知道,不要试图编造答案;
|
16
|
+
- 回答的语言跟用户的问题语言保持一致,比如用户的问题是中文,请用中文作答;用户的问题是英文,请用英文作答;
|
17
|
+
- 你需要根据上下文给出尽可能详细的回答;
|
18
|
+
"""
|
19
|
+
|
20
|
+
messages = [
|
21
|
+
SystemMessagePromptTemplate.from_template(pirate_preamble),
|
22
|
+
HumanMessagePromptTemplate.from_template("{question}"),
|
23
|
+
]
|
24
|
+
COHERE_CHAT_PROMPT = ChatPromptTemplate.from_messages(messages)
|
25
|
+
|
26
|
+
|
27
|
+
# system_content = """## Task & Context
|
28
|
+
# 你是一个准确且可靠的知识库问答助手,能够借助上下文知识回答用户的问题。
|
29
|
+
|
30
|
+
# ## Style Guide
|
31
|
+
# 你需要遵循以下的规则来回答用户问题:
|
32
|
+
# - 如果给定的上下文中没有找到相关的答案,就说你不知道,不要试图编造答案;
|
33
|
+
# - 回答的语言跟用户的问题语言保持一致,比如用户的问题是中文,请用中文作答;用户的问题是英文,请用英文作答;
|
34
|
+
# - 你需要根据上下文给出尽可能详细的回答;|<instruct>|Carefully perform the following instructions, in order, starting each with a new line.
|
35
|
+
# Firstly, Decide which of the retrieved documents are relevant to the user's last input by writing 'Relevant Documents:' followed by comma-separated list of document numbers. If none are relevant, you should instead write 'None'.
|
36
|
+
# Secondly, Decide which of the retrieved documents contain facts that should be cited in a good answer to the user's last input by writing 'Cited Documents:' followed a comma-separated list of document numbers. If you dont want to cite any of them, you should instead write 'None'.
|
37
|
+
# Thirdly, Write 'Answer:' followed by a response to the user's last input. Use the retrieved documents to help you. Do not insert any citations or grounding markup.
|
38
|
+
# Finally, Write 'Grounded answer:' followed by a response to the user's last input. Use the symbols <co: doc> and </co: doc> to indicate when a fact comes from a document in the search result, e.g <co: 0>my fact</co: 0> for a fact from document 0.|<documents>|{documents}"""
|
39
|
+
|
40
|
+
system_content = """## Task & Context
|
41
|
+
你是一个准确且可靠的知识库问答助手,能够借助上下文知识回答用户的问题。
|
42
|
+
|
43
|
+
## Style Guide
|
44
|
+
你需要遵循以下的规则来回答用户问题:
|
45
|
+
- 如果给定的上下文中没有找到相关的答案,就说你不知道,不要试图编造答案;
|
46
|
+
- 回答的语言跟用户的问题语言保持一致,比如用户的问题是中文,请用中文作答;用户的问题是英文,请用英文作答;
|
47
|
+
- 你需要根据上下文给出尽可能详细的回答;|<instruct>|Carefully perform the following instructions, in order, starting each with a new line.
|
48
|
+
Write 'Answer:' followed by a response to the user's last input. Use the retrieved documents to help you. Do not insert any citations or grounding markup.|<documents>|{documents}"""
|
49
|
+
messages = [
|
50
|
+
SystemMessagePromptTemplate.from_template(system_content),
|
51
|
+
HumanMessagePromptTemplate.from_template("{question}"),
|
52
|
+
]
|
53
|
+
COHERE_LOCAL_CHAT_PROMPT_RAG = ChatPromptTemplate.from_messages(messages)
|
54
|
+
|
55
|
+
|
56
|
+
# prompt_template = """<BOS_TOKEN><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble
|
57
|
+
# The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral.
|
58
|
+
|
59
|
+
# # System Preamble
|
60
|
+
# ## Basic Rules
|
61
|
+
# You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions.
|
62
|
+
|
63
|
+
# # User Preamble
|
64
|
+
# ## Task & Context
|
65
|
+
# 你是一个准确且可靠的知识库问答助手,能够借助上下文知识回答用户的问题。
|
66
|
+
|
67
|
+
# ## Style Guide
|
68
|
+
# 你需要遵循以下的规则来回答用户问题:
|
69
|
+
# - 如果给定的上下文中没有找到相关的答案,就说你不知道,不要试图编造答案;
|
70
|
+
# - 回答的语言跟用户的问题语言保持一致,比如用户的问题是中文,请用中文作答;用户的问题是英文,请用英文作答;
|
71
|
+
# - 你需要根据上下文给出尽可能详细的回答;<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>{question}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|><results>
|
72
|
+
# {documents}
|
73
|
+
# </results><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Carefully perform the following instructions, in order, starting each with a new line.
|
74
|
+
# Write 'Answer:' followed by a response to the user's last input. Use the retrieved documents to help you. Do not insert any citations or grounding markup.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"""
|
75
|
+
|
76
|
+
|
77
|
+
prompt_template = """<BOS_TOKEN><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble
|
78
|
+
The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral.
|
79
|
+
|
80
|
+
# System Preamble
|
81
|
+
## Basic Rules
|
82
|
+
You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate.
|
83
|
+
|
84
|
+
# User Preamble
|
85
|
+
## Task & Context
|
86
|
+
你是一个准确且可靠的知识库问答助手,能够借助上下文知识回答用户的问题。
|
87
|
+
|
88
|
+
## Style Guide
|
89
|
+
Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>{question}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|><results>
|
90
|
+
{documents}
|
91
|
+
</results><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Carefully perform the following instructions, in order, starting each with a new line.
|
92
|
+
- 回答的语言跟用户的问题语言保持一致,比如用户的问题是中文,请用中文作答;用户的问题是英文,请用英文作答;
|
93
|
+
- 你需要根据上下文给出尽可能详细的回答;
|
94
|
+
- Write 'Answer:' followed by a response to the user's last input. Use the retrieved documents to help you. <|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"""
|
95
|
+
COHERE_RAW_PROMPT = PromptTemplate(
|
96
|
+
template=prompt_template, input_variables=["documents", "question"]
|
97
|
+
)
|
98
|
+
|
99
|
+
|
100
|
+
# prompt_template = """<BOS_TOKEN><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Use the following pieces of context to answer the question at the end.
|
101
|
+
# If you don't know the answer, just say that you don't know, don't try to make up an answer.<results>
|
102
|
+
# {documents}
|
103
|
+
# </results><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>{question}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"""
|
104
|
+
|
105
|
+
# # prompt_template = """<BOS_TOKEN><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Use the following pieces of context to answer the question at the end.
|
106
|
+
# # If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
107
|
+
# # ----------------
|
108
|
+
# # {documents}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>{question}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"""
|
109
|
+
# COHERE_RAW_PROMPT = PromptTemplate(
|
110
|
+
# template=prompt_template, input_variables=["documents", "question"]
|
111
|
+
# )
|
File without changes
|