PyPI - bisheng-langchain - Versions diffs - 0.3.0rc0__py3-none-any.whl → 0.3.1__py3-none-any.whl - Mend

bisheng-langchain 0.3.0rc0py3-none-any.whl → 0.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

bisheng_langchain/chat_models/host_llm.py +1 -1
bisheng_langchain/document_loaders/elem_unstrcutured_loader.py +5 -3
bisheng_langchain/gpts/agent_types/llm_functions_agent.py +7 -1
bisheng_langchain/gpts/assistant.py +8 -5
bisheng_langchain/gpts/auto_optimization.py +28 -27
bisheng_langchain/gpts/auto_tool_selected.py +14 -15
bisheng_langchain/gpts/load_tools.py +53 -1
bisheng_langchain/gpts/prompts/__init__.py +4 -2
bisheng_langchain/gpts/prompts/assistant_prompt_base.py +1 -0
bisheng_langchain/gpts/prompts/assistant_prompt_cohere.py +19 -0
bisheng_langchain/gpts/prompts/opening_dialog_prompt.py +1 -1
bisheng_langchain/gpts/tools/api_tools/__init__.py +1 -1
bisheng_langchain/gpts/tools/api_tools/base.py +3 -3
bisheng_langchain/gpts/tools/api_tools/flow.py +19 -7
bisheng_langchain/gpts/tools/api_tools/macro_data.py +175 -4
bisheng_langchain/gpts/tools/api_tools/openapi.py +101 -0
bisheng_langchain/gpts/tools/api_tools/sina.py +2 -2
bisheng_langchain/gpts/tools/code_interpreter/tool.py +118 -39
bisheng_langchain/rag/__init__.py +5 -0
bisheng_langchain/rag/bisheng_rag_pipeline.py +320 -0
bisheng_langchain/rag/bisheng_rag_pipeline_v2.py +359 -0
bisheng_langchain/rag/bisheng_rag_pipeline_v2_cohere_raw_prompting.py +376 -0
bisheng_langchain/rag/bisheng_rag_tool.py +288 -0
bisheng_langchain/rag/config/baseline.yaml +86 -0
bisheng_langchain/rag/config/baseline_caibao.yaml +82 -0
bisheng_langchain/rag/config/baseline_caibao_knowledge_v2.yaml +110 -0
bisheng_langchain/rag/config/baseline_caibao_v2.yaml +112 -0
bisheng_langchain/rag/config/baseline_demo_v2.yaml +92 -0
bisheng_langchain/rag/config/baseline_s2b_mix.yaml +88 -0
bisheng_langchain/rag/config/baseline_v2.yaml +90 -0
bisheng_langchain/rag/extract_info.py +38 -0
bisheng_langchain/rag/init_retrievers/__init__.py +4 -0
bisheng_langchain/rag/init_retrievers/baseline_vector_retriever.py +61 -0
bisheng_langchain/rag/init_retrievers/keyword_retriever.py +65 -0
bisheng_langchain/rag/init_retrievers/mix_retriever.py +103 -0
bisheng_langchain/rag/init_retrievers/smaller_chunks_retriever.py +92 -0
bisheng_langchain/rag/prompts/__init__.py +9 -0
bisheng_langchain/rag/prompts/extract_key_prompt.py +34 -0
bisheng_langchain/rag/prompts/prompt.py +47 -0
bisheng_langchain/rag/prompts/prompt_cohere.py +111 -0
bisheng_langchain/rag/qa_corpus/__init__.py +0 -0
bisheng_langchain/rag/qa_corpus/qa_generator.py +143 -0
bisheng_langchain/rag/rerank/__init__.py +5 -0
bisheng_langchain/rag/rerank/rerank.py +48 -0
bisheng_langchain/rag/rerank/rerank_benchmark.py +139 -0
bisheng_langchain/rag/run_qa_gen_web.py +47 -0
bisheng_langchain/rag/run_rag_evaluate_web.py +55 -0
bisheng_langchain/rag/scoring/__init__.py +0 -0
bisheng_langchain/rag/scoring/llama_index_score.py +91 -0
bisheng_langchain/rag/scoring/ragas_score.py +183 -0
bisheng_langchain/rag/utils.py +181 -0
bisheng_langchain/retrievers/ensemble.py +2 -1
bisheng_langchain/vectorstores/elastic_keywords_search.py +2 -1
{bisheng_langchain-0.3.0rc0.dist-info → bisheng_langchain-0.3.1.dist-info}/METADATA +1 -1
{bisheng_langchain-0.3.0rc0.dist-info → bisheng_langchain-0.3.1.dist-info}/RECORD +57 -22
bisheng_langchain/gpts/prompts/base_prompt.py +0 -1
{bisheng_langchain-0.3.0rc0.dist-info → bisheng_langchain-0.3.1.dist-info}/WHEEL +0 -0
{bisheng_langchain-0.3.0rc0.dist-info → bisheng_langchain-0.3.1.dist-info}/top_level.txt +0 -0

bisheng_langchain/rag/config/baseline.yaml ADDED Viewed

@@ -0,0 +1,86 @@
+data:
+  origin_file_path: '/home/public/rag_benchmark_v1.0/rag_benchmark_processed'
+  question: '/home/gulixin/workspace/llm/bisheng/src/bisheng-langchain/experimental/rag/data/questions_info_with_answer_sample.xlsx'
+  save_answer: '/home/gulixin/workspace/llm/bisheng/src/bisheng-langchain/experimental/rag/data/questions_info_with_answer_sample_gpt4_12chunk_test.xlsx'
+milvus:
+  host: '192.168.106.116'
+  port: '19530'
+  drop_old: True
+elasticsearch:
+  url: 'http://192.168.106.116:9200'
+  ssl_verify:
+    basic_auth: ["elastic", "oSGL-zVvZ5P3Tm7qkDLC"]
+  drop_old: True
+embedding:
+  type: 'OpenAIEmbeddings'
+  model: 'text-embedding-ada-002'
+  openai_api_key: ''
+  openai_proxy: 'http://118.195.232.223:39995'
+chat_llm:
+  type: 'ChatOpenAI'
+  model: 'gpt-4-1106-preview'
+  openai_api_key: ''
+  openai_proxy: 'http://118.195.232.223:39995'
+  temperature: 0.0
+# chat_llm:
+#   type: 'ChatQWen'
+#   model_name: 'qwen1.5-72b-chat'
+#   api_key: ''
+#   temperature: 0.01
+loader:
+  type: 'ElemUnstructuredLoader'
+  unstructured_api_url: 'http://192.168.106.12:10001/v1/etl4llm/predict'
+retriever:
+  type: 'EnsembleRetriever' # 不动
+  suffix: 'test_mix'
+  retrievers:
+    - type: 'MixRetriever'
+      splitter:
+        vector_text_splitter:
+          type: 'ElemCharacterTextSplitter'
+          chunk_size: 500
+          chunk_overlap: 50
+          separators: ["\n\n", "\n", " ", ""]
+        keyword_text_splitter:
+          type: 'ElemCharacterTextSplitter'
+          chunk_size: 500
+          chunk_overlap: 50
+          separators: ["\n\n", "\n", " ", ""]
+      retrieval:
+        combine_strategy: 'mix'
+        search_type: 'similarity'
+        vector_search_kwargs:
+          k: 6
+        keyword_search_kwargs:
+          k: 6
+post_retrieval:
+  delete_duplicate: False
+  with_rank: False
+  rerank:
+    type: 'CustomReranker'
+    model_path: '/home/public/llm/bge-reranker-large'
+    device_id: 'cuda:0'
+    threshold: 0.0
+generate:
+  with_retrieval: True
+  chain_type: 'stuff'
+  # prompt_type: 'BASE_PROMPT'
+  prompt_type: 'CHAT_PROMPT'
+metric:
+  type: 'bisheng-ragas'
+  question_column: '问题'
+  gt_column: 'GT'
+  answer_column: 'rag_answer'
+  query_type_column: '问题类型'
+  metrics: ['answer_correctness_bisheng']
+  batch_size: 5

bisheng_langchain/rag/config/baseline_caibao.yaml ADDED Viewed

@@ -0,0 +1,82 @@
+data:
+  origin_file_path: '/home/public/rag_benchmark_v1.0/rag_benchmark_processed'
+  question: '/home/gulixin/workspace/llm/bisheng/src/bisheng-langchain/experimental/rag/data/questions_info_with_answer_sample.xlsx'
+  save_answer: '/home/gulixin/workspace/llm/bisheng/src/bisheng-langchain/experimental/rag/data/finance_report_data_100_single_gpt3.5_20chunk.xlsx'
+milvus:
+  host: '110.16.193.170'
+  port: '50062'
+  drop_old: True
+elasticsearch:
+  url: 'http://110.16.193.170:50062/es'
+  ssl_verify:
+    basic_auth: ["elastic", "oSGL-zVvZ5P3Tm7qkDLC"]
+  drop_old: True
+embedding:
+  type: 'OpenAIEmbeddings'
+  model: 'text-embedding-ada-002'
+  openai_api_key: ''
+  openai_proxy: ''
+chat_llm:
+  type: 'ChatOpenAI'
+  model: 'gpt-4-1106-preview'
+  openai_api_key: ''
+  openai_proxy: ''
+  temperature: 0.0
+loader:
+  type: 'ElemUnstructuredLoader'
+  unstructured_api_url: 'http://bisheng.dataelem.com/v1/etl4llm/predict'
+retriever:
+  type: 'EnsembleRetriever' # 不动
+  suffix: 'test_mix'
+  retrievers:
+    - type: 'MixRetriever'
+      splitter:
+        vector_text_splitter:
+          type: 'ElemCharacterTextSplitter'
+          chunk_size: 500
+          chunk_overlap: 50
+          separators: ["\n\n", "\n", " ", ""]
+        keyword_text_splitter:
+          type: 'ElemCharacterTextSplitter'
+          chunk_size: 500
+          chunk_overlap: 50
+          separators: ["\n\n", "\n", " ", ""]
+      retrieval:
+        combine_strategy: 'mix'
+        search_type: 'similarity'
+        vector_search_kwargs:
+          k: 6
+        keyword_search_kwargs:
+          k: 6
+post_retrieval:
+  delete_duplicate: False
+  with_rank: False
+  rerank:
+    type: 'CustomReranker'
+    model_path: '/home/public/llm/bge-reranker-large'
+    device_id: 'cuda:0'
+    threshold: 0.0
+generate:
+  with_retrieval: True
+  chain_type: 'stuff'
+  # prompt_type: 'BASE_PROMPT'
+  prompt_type: 'CHAT_PROMPT'
+metric:
+  type: 'bisheng-ragas'
+  question_column: '问题'
+  gt_column: 'GT'
+  answer_column: 'rag_answer'
+  query_type_column: '问题类型'
+  # metrics: ['answer_correctness_bisheng']
+  metrics: ['answer_recall_bisheng']
+  gt_split_column: 'gt_split_point'
+  batch_size: 5

bisheng_langchain/rag/config/baseline_caibao_knowledge_v2.yaml ADDED Viewed

@@ -0,0 +1,110 @@
+data:
+  origin_file_path: '/home/public/rag_benchmark_finance_report'
+  question: '/home/public/rag_benchmark_finance_report/finance_report_data_100.xlsx'
+  save_answer: '/home/public/rag_benchmark_finance_report/finance_report_data_100_knowledge_command-r-plus_20chunk_chunk_size_1000.xlsx'
+milvus:
+  host: '110.16.193.170'
+  port: '50062'
+  drop_old: True
+elasticsearch:
+  url: 'http://110.16.193.170:50062/es'
+  ssl_verify:
+    basic_auth: ["elastic", "oSGL-zVvZ5P3Tm7qkDLC"]
+  drop_old: True
+embedding:
+  type: 'OpenAIEmbeddings'
+  model: 'text-embedding-ada-002'
+  openai_api_key: ''
+  openai_proxy: ''
+# chat_llm:
+#   type: 'ChatOpenAI'
+#   model: 'gpt-4-1106-preview'
+#   openai_api_key: ''
+#   openai_proxy: ''
+#   temperature: 0.0
+chat_llm:
+  type: 'ChatCohere'
+  model: 'command-r-plus'
+  cohere_api_key: ''
+  max_tokens: 1000
+  temperature: 0.01
+# chat_llm:
+#   type: 'ChatOpenAI'
+#   model: 'moonshot-v1-128k'
+#   openai_api_base: 'https://api.moonshot.cn/v1'
+#   openai_api_key : "Y21pamZpdWNwN2Zic3ZtdGJpdGc6bXNrLWZLNHp4VDMxMklsVU56MUxmOVNwY0RMeFMyaUg="
+#   openai_proxy: ''
+#   temperature: 0.01
+# chat_llm:
+#   type: 'ChatQWen'
+#   model_name: 'qwen1.5-72b-chat'
+#   api_key: ''
+#   temperature: 0.01
+loader:
+  type: 'ElemUnstructuredLoader'
+  unstructured_api_url: 'https://bisheng.dataelem.com/api/v1/etl4llm/predict'
+retriever:
+  type: 'EnsembleRetriever' # 不动
+  suffix: 'benchmark_caibao_1000_knowledge_source_title'
+  retrievers:
+    - type: 'KeywordRetriever'
+      splitter:
+        text_splitter:
+          # type: 'ElemCharacterTextSplitter'
+          type: 'RecursiveCharacterTextSplitter'
+          chunk_size: 1000
+          chunk_overlap: 0
+          separators: ["\n\n"]
+      retrieval:
+        search_type: 'similarity'
+        search_kwargs:
+          k: 10
+    - type: 'BaselineVectorRetriever'
+      splitter:
+        text_splitter:
+          # type: 'ElemCharacterTextSplitter'
+          type: 'RecursiveCharacterTextSplitter'
+          chunk_size: 1000
+          chunk_overlap: 0
+          separators: ["\n\n"]
+      retrieval:
+        search_type: 'similarity'
+        search_kwargs:
+          k: 10
+post_retrieval:
+  delete_duplicate: False
+  with_rank: False
+  rerank:
+    type: 'CustomReranker'
+    model_path: '/home/public/llm/bge-reranker-large'
+    device_id: 'cuda:0'
+    threshold: 0.0
+  sort_by_source_and_index: False
+generate:
+  with_retrieval: True
+  max_content: 100000
+  chain_type: 'stuff'
+  # prompt_type: 'BASE_PROMPT'
+  prompt_type: 'CHAT_PROMPT'
+metric:
+  type: 'bisheng-ragas'
+  question_column: '问题'
+  gt_column: 'GT'
+  answer_column: 'rag_answer'
+  query_type_column: '问题类型'
+  # metrics: ['answer_correctness_bisheng']
+  metrics: ['answer_recall_bisheng']
+  gt_split_column: 'gt_split_point'
+  batch_size: 5

bisheng_langchain/rag/config/baseline_caibao_v2.yaml ADDED Viewed

@@ -0,0 +1,112 @@
+data:
+  origin_file_path: '/home/public/rag_benchmark_finance_report'
+  question: '/home/public/rag_benchmark_finance_report/finance_report_data_100_single.xlsx'
+  save_answer: '/home/public/rag_benchmark_finance_report/finance_report_data_100_single_command-r-plus_20chunk_chunk_size_1000_with_source_title_overlap100.xlsx'
+milvus:
+  host: '110.16.193.170'
+  port: '50062'
+  drop_old: True
+elasticsearch:
+  url: 'http://110.16.193.170:50062/es'
+  ssl_verify:
+    basic_auth: ["elastic", "oSGL-zVvZ5P3Tm7qkDLC"]
+  drop_old: True
+  extract_key_by_llm: False
+embedding:
+  type: 'OpenAIEmbeddings'
+  model: 'text-embedding-ada-002'
+  openai_api_key: ''
+  openai_proxy: ''
+# chat_llm:
+#   type: 'ChatOpenAI'
+#   model: 'gpt-4-1106-preview'
+#   openai_api_key: ''
+#   openai_proxy: ''
+#   temperature: 0.0
+chat_llm:
+  type: 'ChatCohere'
+  model: 'command-r-plus'
+  cohere_api_key: ''
+  max_tokens: 1000
+  temperature: 0.01
+# chat_llm:
+#   type: 'ChatQWen'
+#   model_name: 'qwen1.5-110b-chat'
+#   api_key: ''
+#   temperature: 0.01
+# chat_llm:
+#   type: 'ChatOpenAI'
+#   model: 'qwen1.5-110b-chat'
+#   openai_api_base: 'http://60.31.21.42:12511/v1'
+#   openai_api_key : "Z9b8x3V7C2n0Q5T"
+#   openai_proxy: ''
+#   temperature: 0.01
+loader:
+  type: 'ElemUnstructuredLoader'
+  unstructured_api_url: 'https://bisheng.dataelem.com/api/v1/etl4llm/predict'
+retriever:
+  type: 'EnsembleRetriever' # 不动
+  suffix: 'benchmark_caibao_1000_source_title_overlap100'
+  add_aux_info: True
+  retrievers:
+    - type: 'KeywordRetriever'
+      splitter:
+        text_splitter:
+          # type: 'ElemCharacterTextSplitter'
+          type: 'RecursiveCharacterTextSplitter'
+          chunk_size: 1000
+          chunk_overlap: 100
+          separators: ["\n\n"]
+      retrieval:
+        search_type: 'similarity'
+        search_kwargs:
+          k: 10
+    - type: 'BaselineVectorRetriever'
+      splitter:
+        text_splitter:
+          # type: 'ElemCharacterTextSplitter'
+          type: 'RecursiveCharacterTextSplitter'
+          chunk_size: 1000
+          chunk_overlap: 100
+          separators: ["\n\n"]
+      retrieval:
+        search_type: 'similarity'
+        search_kwargs:
+          k: 10
+post_retrieval:
+  delete_duplicate: False
+  with_rank: False
+  rerank:
+    type: 'CustomReranker'
+    model_path: '/home/public/llm/bge-reranker-large'
+    device_id: 'cuda:0'
+    threshold: 0.0
+  sort_by_source_and_index: False
+generate:
+  with_retrieval: True
+  max_content: 100000
+  chain_type: 'stuff'
+  # prompt_type: 'BASE_PROMPT'
+  prompt_type: 'CHAT_PROMPT'
+metric:
+  type: 'bisheng-ragas'
+  question_column: '问题'
+  gt_column: 'GT'
+  answer_column: 'rag_answer'
+  query_type_column: '问题类型'
+  # metrics: ['answer_correctness_bisheng']
+  metrics: ['answer_recall_bisheng']
+  gt_split_column: 'gt_split_point'
+  batch_size: 5

bisheng_langchain/rag/config/baseline_demo_v2.yaml ADDED Viewed

@@ -0,0 +1,92 @@
+data:
+  origin_file_path: '/home/public/rag_benchmark_v1.0/rag_benchmark_processed'
+  question: '/home/gulixin/workspace/llm/bisheng/src/bisheng-langchain/experimental/rag/data/questions_info_with_answer_sample.xlsx'
+  save_answer: '/home/gulixin/workspace/llm/bisheng/src/bisheng-langchain/experimental/rag/data/questions_info_with_answer_sample_gpt4_10w.xlsx'
+milvus:
+  host: '192.168.106.116'
+  port: '19530'
+  drop_old: True
+elasticsearch:
+  url: 'http://192.168.106.116:9200'
+  ssl_verify:
+    basic_auth: ["elastic", "oSGL-zVvZ5P3Tm7qkDLC"]
+  drop_old: True
+embedding:
+  type: 'OpenAIEmbeddings'
+  model: 'text-embedding-ada-002'
+  openai_api_key: ''
+  openai_proxy: 'http://192.168.106.20:1081'
+chat_llm:
+  type: 'ChatOpenAI'
+  model: 'gpt-4-1106-preview'
+  openai_api_key: ''
+  openai_proxy: 'http://192.168.106.20:1081'
+  temperature: 0.0
+# chat_llm:
+#   type: 'ChatQWen'
+#   model_name: 'qwen1.5-72b-chat'
+#   api_key: ''
+#   temperature: 0.01
+loader:
+  type: 'ElemUnstructuredLoader'
+  unstructured_api_url: 'http://192.168.106.12:10001/v1/etl4llm/predict'
+retriever:
+  type: 'EnsembleRetriever' # 不动
+  suffix: 'benchmark_demo_test'
+  retrievers:
+    - type: 'KeywordRetriever'
+      splitter:
+        text_splitter:
+          type: 'ElemCharacterTextSplitter'
+          chunk_size: 500
+          chunk_overlap: 0
+          separators: ["\n\n"]
+      retrieval:
+        search_type: 'similarity'
+        search_kwargs:
+          k: 10000
+    - type: 'BaselineVectorRetriever'
+      splitter:
+        text_splitter:
+          type: 'ElemCharacterTextSplitter'
+          chunk_size: 500
+          chunk_overlap: 0
+          separators: ["\n\n"]
+      retrieval:
+        search_type: 'similarity'
+        search_kwargs:
+          k: 10000
+post_retrieval:
+  delete_duplicate: False
+  with_rank: False
+  rerank:
+    type: 'CustomReranker'
+    model_path: '/home/public/llm/bge-reranker-large'
+    device_id: 'cuda:0'
+    threshold: 0.0
+  sort_by_source_and_index: True
+generate:
+  with_retrieval: True
+  max_content: 100000
+  chain_type: 'stuff'
+  # prompt_type: 'BASE_PROMPT'
+  prompt_type: 'CHAT_PROMPT'
+metric:
+  type: 'bisheng-ragas'
+  question_column: '问题'
+  gt_column: 'GT'
+  answer_column: 'rag_answer'
+  query_type_column: '问题类型'
+  metrics: ['answer_correctness_bisheng']
+  # metrics: ['answer_recall_bisheng']
+  batch_size: 5

bisheng_langchain/rag/config/baseline_s2b_mix.yaml ADDED Viewed

@@ -0,0 +1,88 @@
+data:
+  origin_file_path: '/home/public/rag_benchmark_v1.0/rag_benchmark_processed'
+  question: '/home/gulixin/workspace/llm/bisheng/src/bisheng-langchain/experimental/rag/data/questions_info_with_answer_sample.xlsx'
+  save_answer: '/home/gulixin/workspace/llm/bisheng/src/bisheng-langchain/experimental/rag/data/questions_info_with_answer_sample_gpt4_12chunk_s2b.xlsx'
+milvus:
+  host: '192.168.106.116'
+  port: '19530'
+  drop_old: True
+elasticsearch:
+  url: 'http://192.168.106.116:9200'
+  ssl_verify:
+    basic_auth: ["elastic", "oSGL-zVvZ5P3Tm7qkDLC"]
+  drop_old: True
+embedding:
+  type: 'OpenAIEmbeddings'
+  model: 'text-embedding-ada-002'
+  openai_api_key: ''
+  openai_proxy: 'http://118.195.232.223:39995'
+chat_llm:
+  type: 'ChatOpenAI'
+  model: 'gpt-4-1106-preview'
+  openai_api_key: ''
+  openai_proxy: 'http://118.195.232.223:39995'
+  temperature: 0.0
+loader:
+  type: 'ElemUnstructuredLoader'
+  unstructured_api_url: 'http://192.168.106.12:10001/v1/etl4llm/predict'
+retriever:
+  type: 'EnsembleRetriever' # 不动
+  suffix: 'test_s2b_mix'
+  retrievers:
+    - type: 'SmallerChunksVectorRetriever'
+      splitter:
+        parent_splitter:
+          type: 'ElemCharacterTextSplitter'
+          chunk_size: 1024
+          chunk_overlap: 200
+          separators: ["\n\n", "\n", " ", ""]
+        child_splitter:
+          type: 'RecursiveCharacterTextSplitter'
+          chunk_size: 216
+          chunk_overlap: 0
+          separators: ["\n\n", "\n", " ", ""]
+      retrieval:
+        search_type: 'similarity'
+        child_search_kwargs:
+          k: 12
+    - type: 'KeywordRetriever'
+      splitter:
+        text_splitter:
+          type: 'ElemCharacterTextSplitter'
+          chunk_size: 1024
+          chunk_overlap: 200
+          separators: ["\n\n", "\n", " ", ""]
+      retrieval:
+        search_type: 'similarity'
+        search_kwargs:
+          k: 6
+post_retrieval:
+  delete_duplicate: True
+  with_rank: False
+  rerank:
+    type: 'CustomReranker'
+    model_path: '/home/public/llm/bge-reranker-large'
+    device_id: 'cuda:0'
+    threshold: 0.0
+generate:
+  with_retrieval: True
+  chain_type: 'stuff'
+  # prompt_type: 'BASE_PROMPT'
+  prompt_type: 'CHAT_PROMPT'
+metric:
+  type: 'bisheng-ragas'
+  question_column: '问题'
+  gt_column: 'GT'
+  answer_column: 'rag_answer'
+  query_type_column: '问题类型'
+  metrics: ['answer_correctness_bisheng']
+  batch_size: 5

bisheng_langchain/rag/config/baseline_v2.yaml ADDED Viewed

@@ -0,0 +1,90 @@
+data:
+  origin_file_path: '/home/public/rag_benchmark_finance_report'
+  question: '/home/public/rag_benchmark_finance_report/finance_report_data_100_single.xlsx'
+  save_answer: '/home/public/rag_benchmark_finance_report/finance_report_data_100_single_qwen1.5_72b_20chunk_chunk_size_1000_with_source_title.xlsx'
+milvus:
+  host: '110.16.193.170'
+  port: '50062'
+  drop_old: True
+elasticsearch:
+  url: 'http://110.16.193.170:50062/es'
+  ssl_verify:
+    basic_auth: ["elastic", "oSGL-zVvZ5P3Tm7qkDLC"]
+  drop_old: True
+embedding:
+  type: 'OpenAIEmbeddings'
+  model: 'text-embedding-ada-002'
+  openai_api_key: ''
+  openai_proxy: ''
+chat_llm:
+  type: 'ChatOpenAI'
+  model: 'gpt-4-1106-preview'
+  openai_api_key: ''
+  openai_proxy: ''
+  temperature: 0.0
+loader:
+  type: 'ElemUnstructuredLoader'
+  unstructured_api_url: 'https://bisheng.dataelem.com/api/v1/etl4llm/predict'
+retriever:
+  type: 'EnsembleRetriever' # 不动
+  suffix: 'benchmark_caibao_1000_source_title'
+  add_aux_info: True
+  retrievers:
+    - type: 'KeywordRetriever'
+      splitter:
+        text_splitter:
+          # type: 'ElemCharacterTextSplitter'
+          type: 'RecursiveCharacterTextSplitter'
+          chunk_size: 1000
+          chunk_overlap: 0
+          separators: ["\n\n"]
+      retrieval:
+        search_type: 'similarity'
+        search_kwargs:
+          k: 100
+    - type: 'BaselineVectorRetriever'
+      splitter:
+        text_splitter:
+          # type: 'ElemCharacterTextSplitter'
+          type: 'RecursiveCharacterTextSplitter'
+          chunk_size: 1000
+          chunk_overlap: 0
+          separators: ["\n\n"]
+      retrieval:
+        search_type: 'similarity'
+        search_kwargs:
+          k: 100
+post_retrieval:
+  delete_duplicate: False
+  with_rank: False
+  rerank:
+    type: 'CustomReranker'
+    model_path: '/home/public/llm/bge-reranker-large'
+    device_id: 'cuda:0'
+    threshold: 0.0
+  sort_by_source_and_index: True
+generate:
+  with_retrieval: True
+  max_content: 15000
+  chain_type: 'stuff'
+  # prompt_type: 'BASE_PROMPT'
+  prompt_type: 'CHAT_PROMPT'
+metric:
+  type: 'bisheng-ragas'
+  question_column: '问题'
+  gt_column: 'GT'
+  answer_column: 'rag_answer'
+  query_type_column: '问题类型'
+  # metrics: ['answer_correctness_bisheng']
+  metrics: ['answer_recall_bisheng']
+  gt_split_column: 'gt_split_point'
+  batch_size: 5

bisheng_langchain/rag/extract_info.py ADDED Viewed

@@ -0,0 +1,38 @@
+import httpx
+from langchain.chat_models import ChatOpenAI
+from bisheng_langchain.chat_models import ChatQWen
+from langchain.chains import LLMChain
+from langchain.prompts.chat import (
+    ChatPromptTemplate,
+    SystemMessagePromptTemplate,
+    HumanMessagePromptTemplate,
+)
+system_template = """你是一个可靠标题生成或者提取助手。你会收到一篇文档的主要内容，请根据这些内容生成或者提取这篇文档的标题。"""
+human_template = """
+文档内容如下：
+{context}
+生成或提取的标题：
+"""
+messages = [
+        SystemMessagePromptTemplate.from_template(system_template),
+        HumanMessagePromptTemplate.from_template(human_template),
+    ]
+title_extract_prompt = ChatPromptTemplate.from_messages(messages)
+def extract_title(llm, text, max_length=7000) -> str:
+    chain = LLMChain(llm=llm, prompt=title_extract_prompt)
+    ans = chain.run(context=text[:max_length])
+    return ans
+if __name__ == '__main__':
+    llm = ChatQWen(model_name='qwen1.5-72b-chat',
+                   api_key='',
+                   temperature=0.01)
+    text = "江苏蔚蓝锂芯股份有限公司\n2021 年年度报告 \n2022 年 03 月\n\n 第一节 重要提示、目录和释义\n公司董事会、监事会及董事、监事、高级管理人员保证年度报告内容的真实、准确、完整，不存在虚假记载、误导性陈述或重大遗漏，并承担个别和连带的法律责任。\n公司负责人 CHEN KAI、主管会计工作负责人林文华及会计机构负责人(会计主管人员)张宗红声明：保证本年度报告中财务报告的真实、准确、完整。\n所有董事均已出席了审议本报告的董事会会议。"
+    ans = extract_title(llm, text)
+    print(ans)

bisheng-langchain 0.3.0rc0__py3-none-any.whl → 0.3.1__py3-none-any.whl

bisheng-langchain 0.3.0rc0py3-none-any.whl → 0.3.1py3-none-any.whl