ragflow-sdk 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragflow_sdk-0.0.1/PKG-INFO +59 -0
- ragflow_sdk-0.0.1/README.md +41 -0
- ragflow_sdk-0.0.1/pyproject.toml +17 -0
- ragflow_sdk-0.0.1/ragflow_sdk/__init__.py +10 -0
- ragflow_sdk-0.0.1/ragflow_sdk/dataset.py +21 -0
- ragflow_sdk-0.0.1/ragflow_sdk/modules/__init__.py +0 -0
- ragflow_sdk-0.0.1/ragflow_sdk/modules/base.py +38 -0
- ragflow_sdk-0.0.1/ragflow_sdk/modules/chat.py +75 -0
- ragflow_sdk-0.0.1/ragflow_sdk/modules/chunk.py +27 -0
- ragflow_sdk-0.0.1/ragflow_sdk/modules/dataset.py +77 -0
- ragflow_sdk-0.0.1/ragflow_sdk/modules/document.py +77 -0
- ragflow_sdk-0.0.1/ragflow_sdk/modules/session.py +85 -0
- ragflow_sdk-0.0.1/ragflow_sdk/ragflow.py +181 -0
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: ragflow-sdk
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Python client sdk of [RAGFlow](https://github.com/infiniflow/ragflow). RAGFlow is an open-source RAG (Retrieval-Augmented Generation) engine based on deep document understanding.
|
|
5
|
+
License: Apache License, Version 2.0
|
|
6
|
+
Author: Zhichang Yu
|
|
7
|
+
Author-email: yuzhichang@gmail.com
|
|
8
|
+
Requires-Python: >=3.10,<4.0
|
|
9
|
+
Classifier: License :: Other/Proprietary License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Requires-Dist: requests (>=2.30.0,<3.0.0)
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
|
|
18
|
+
# python-ragflow
|
|
19
|
+
|
|
20
|
+
# update python client
|
|
21
|
+
|
|
22
|
+
- Update "version" field of [project] chapter
|
|
23
|
+
- Build new python SDK
|
|
24
|
+
- Upload to pypi.org
|
|
25
|
+
- Install new python SDK
|
|
26
|
+
|
|
27
|
+
# build python SDK
|
|
28
|
+
|
|
29
|
+
```shell
|
|
30
|
+
rm -f dist/* && python setup.py sdist bdist_wheel
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
# install python SDK
|
|
34
|
+
```shell
|
|
35
|
+
pip uninstall -y ragflow && pip install dist/*.whl
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
This will install ragflow-sdk and its dependencies.
|
|
39
|
+
|
|
40
|
+
# upload to pypi.org
|
|
41
|
+
```shell
|
|
42
|
+
twine upload dist/*.whl
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Enter your pypi API token according to the prompt.
|
|
46
|
+
|
|
47
|
+
Note that pypi allows a version of a package [be uploaded only once](https://pypi.org/help/#file-name-reuse). You need to change the `version` inside the `pyproject.toml` before building and uploading.
|
|
48
|
+
|
|
49
|
+
# using
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
# For developer
|
|
56
|
+
```shell
|
|
57
|
+
pip install -e .
|
|
58
|
+
```
|
|
59
|
+
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# python-ragflow
|
|
2
|
+
|
|
3
|
+
# update python client
|
|
4
|
+
|
|
5
|
+
- Update "version" field of [project] chapter
|
|
6
|
+
- Build new python SDK
|
|
7
|
+
- Upload to pypi.org
|
|
8
|
+
- Install new python SDK
|
|
9
|
+
|
|
10
|
+
# build python SDK
|
|
11
|
+
|
|
12
|
+
```shell
|
|
13
|
+
rm -f dist/* && python setup.py sdist bdist_wheel
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
# install python SDK
|
|
17
|
+
```shell
|
|
18
|
+
pip uninstall -y ragflow && pip install dist/*.whl
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
This will install ragflow-sdk and its dependencies.
|
|
22
|
+
|
|
23
|
+
# upload to pypi.org
|
|
24
|
+
```shell
|
|
25
|
+
twine upload dist/*.whl
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Enter your pypi API token according to the prompt.
|
|
29
|
+
|
|
30
|
+
Note that pypi allows a version of a package [be uploaded only once](https://pypi.org/help/#file-name-reuse). You need to change the `version` inside the `pyproject.toml` before building and uploading.
|
|
31
|
+
|
|
32
|
+
# using
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
# For developer
|
|
39
|
+
```shell
|
|
40
|
+
pip install -e .
|
|
41
|
+
```
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "ragflow-sdk"
|
|
3
|
+
version = "0.0.1"
|
|
4
|
+
description = "Python client sdk of [RAGFlow](https://github.com/infiniflow/ragflow). RAGFlow is an open-source RAG (Retrieval-Augmented Generation) engine based on deep document understanding."
|
|
5
|
+
authors = ["Zhichang Yu <yuzhichang@gmail.com>"]
|
|
6
|
+
license = "Apache License, Version 2.0"
|
|
7
|
+
readme = "README.md"
|
|
8
|
+
package-mode = true
|
|
9
|
+
|
|
10
|
+
[tool.poetry.dependencies]
|
|
11
|
+
python = "^3.10"
|
|
12
|
+
requests = "^2.30.0"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
[build-system]
|
|
16
|
+
requires = ["poetry-core"]
|
|
17
|
+
build-backend = "poetry.core.masonry.api"
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import importlib.metadata
|
|
2
|
+
|
|
3
|
+
__version__ = importlib.metadata.version("ragflow")
|
|
4
|
+
|
|
5
|
+
from .ragflow import RAGFlow
|
|
6
|
+
from .modules.dataset import DataSet
|
|
7
|
+
from .modules.chat import Chat
|
|
8
|
+
from .modules.session import Session
|
|
9
|
+
from .modules.document import Document
|
|
10
|
+
from .modules.chunk import Chunk
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
class DataSet:
|
|
17
|
+
def __init__(self, user_key, dataset_url, uuid, name):
|
|
18
|
+
self.user_key = user_key
|
|
19
|
+
self.dataset_url = dataset_url
|
|
20
|
+
self.uuid = uuid
|
|
21
|
+
self.name = name
|
|
File without changes
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
class Base(object):
|
|
2
|
+
def __init__(self, rag, res_dict):
|
|
3
|
+
self.rag = rag
|
|
4
|
+
for k, v in res_dict.items():
|
|
5
|
+
if isinstance(v, dict):
|
|
6
|
+
self.__dict__[k] = Base(rag, v)
|
|
7
|
+
else:
|
|
8
|
+
self.__dict__[k] = v
|
|
9
|
+
|
|
10
|
+
def to_json(self):
|
|
11
|
+
pr = {}
|
|
12
|
+
for name in dir(self):
|
|
13
|
+
value = getattr(self, name)
|
|
14
|
+
if not name.startswith('__') and not callable(value) and name != "rag":
|
|
15
|
+
if isinstance(value, Base):
|
|
16
|
+
pr[name] = value.to_json()
|
|
17
|
+
else:
|
|
18
|
+
pr[name] = value
|
|
19
|
+
return pr
|
|
20
|
+
|
|
21
|
+
def post(self, path, json=None, stream=False, files=None):
|
|
22
|
+
res = self.rag.post(path, json, stream=stream,files=files)
|
|
23
|
+
return res
|
|
24
|
+
|
|
25
|
+
def get(self, path, params=None):
|
|
26
|
+
res = self.rag.get(path, params)
|
|
27
|
+
return res
|
|
28
|
+
|
|
29
|
+
def rm(self, path, json):
|
|
30
|
+
res = self.rag.delete(path, json)
|
|
31
|
+
return res
|
|
32
|
+
|
|
33
|
+
def put(self,path, json):
|
|
34
|
+
res = self.rag.put(path,json)
|
|
35
|
+
return res
|
|
36
|
+
|
|
37
|
+
def __str__(self):
|
|
38
|
+
return str(self.to_json())
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from .base import Base
|
|
4
|
+
from .session import Session
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Chat(Base):
|
|
8
|
+
def __init__(self, rag, res_dict):
|
|
9
|
+
self.id = ""
|
|
10
|
+
self.name = "assistant"
|
|
11
|
+
self.avatar = "path/to/avatar"
|
|
12
|
+
self.dataset_ids = ["kb1"]
|
|
13
|
+
self.llm = Chat.LLM(rag, {})
|
|
14
|
+
self.prompt = Chat.Prompt(rag, {})
|
|
15
|
+
super().__init__(rag, res_dict)
|
|
16
|
+
|
|
17
|
+
class LLM(Base):
|
|
18
|
+
def __init__(self, rag, res_dict):
|
|
19
|
+
self.model_name = "deepseek-chat"
|
|
20
|
+
self.temperature = 0.1
|
|
21
|
+
self.top_p = 0.3
|
|
22
|
+
self.presence_penalty = 0.4
|
|
23
|
+
self.frequency_penalty = 0.7
|
|
24
|
+
self.max_tokens = 512
|
|
25
|
+
super().__init__(rag, res_dict)
|
|
26
|
+
|
|
27
|
+
class Prompt(Base):
|
|
28
|
+
def __init__(self, rag, res_dict):
|
|
29
|
+
self.similarity_threshold = 0.2
|
|
30
|
+
self.keywords_similarity_weight = 0.7
|
|
31
|
+
self.top_n = 8
|
|
32
|
+
self.variables = [{"key": "knowledge", "optional": True}]
|
|
33
|
+
self.rerank_model = None
|
|
34
|
+
self.empty_response = None
|
|
35
|
+
self.opener = "Hi! I'm your assistant, what can I do for you?"
|
|
36
|
+
self.show_quote = True
|
|
37
|
+
self.prompt = (
|
|
38
|
+
"You are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. "
|
|
39
|
+
"Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, "
|
|
40
|
+
"your answer must include the sentence 'The answer you are looking for is not found in the knowledge base!' "
|
|
41
|
+
"Answers need to consider chat history.\nHere is the knowledge base:\n{knowledge}\nThe above is the knowledge base."
|
|
42
|
+
)
|
|
43
|
+
super().__init__(rag, res_dict)
|
|
44
|
+
|
|
45
|
+
def update(self, update_message: dict):
|
|
46
|
+
res = self.put(f'/chats/{self.id}',
|
|
47
|
+
update_message)
|
|
48
|
+
res = res.json()
|
|
49
|
+
if res.get("code") != 0:
|
|
50
|
+
raise Exception(res["message"])
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def create_session(self, name: str = "New session") -> Session:
|
|
54
|
+
res = self.post(f"/chats/{self.id}/sessions", {"name": name})
|
|
55
|
+
res = res.json()
|
|
56
|
+
if res.get("code") == 0:
|
|
57
|
+
return Session(self.rag, res['data'])
|
|
58
|
+
raise Exception(res["message"])
|
|
59
|
+
|
|
60
|
+
def list_sessions(self,page: int = 1, page_size: int = 1024, orderby: str = "create_time", desc: bool = True,
|
|
61
|
+
id: str = None, name: str = None) -> List[Session]:
|
|
62
|
+
res = self.get(f'/chats/{self.id}/sessions',{"page": page, "page_size": page_size, "orderby": orderby, "desc": desc, "id": id, "name": name} )
|
|
63
|
+
res = res.json()
|
|
64
|
+
if res.get("code") == 0:
|
|
65
|
+
result_list = []
|
|
66
|
+
for data in res["data"]:
|
|
67
|
+
result_list.append(Session(self.rag, data))
|
|
68
|
+
return result_list
|
|
69
|
+
raise Exception(res["message"])
|
|
70
|
+
|
|
71
|
+
def delete_sessions(self,ids:List[str]=None):
|
|
72
|
+
res = self.rm(f"/chats/{self.id}/sessions", {"ids": ids})
|
|
73
|
+
res = res.json()
|
|
74
|
+
if res.get("code") != 0:
|
|
75
|
+
raise Exception(res.get("message"))
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from .base import Base
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Chunk(Base):
|
|
5
|
+
def __init__(self, rag, res_dict):
|
|
6
|
+
self.id = ""
|
|
7
|
+
self.content = ""
|
|
8
|
+
self.important_keywords = []
|
|
9
|
+
self.create_time = ""
|
|
10
|
+
self.create_timestamp = 0.0
|
|
11
|
+
self.dataset_id = None
|
|
12
|
+
self.document_name = ""
|
|
13
|
+
self.document_id = ""
|
|
14
|
+
self.available = True
|
|
15
|
+
for k in list(res_dict.keys()):
|
|
16
|
+
if k not in self.__dict__:
|
|
17
|
+
res_dict.pop(k)
|
|
18
|
+
super().__init__(rag, res_dict)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def update(self,update_message:dict):
|
|
22
|
+
res = self.put(f"/datasets/{self.dataset_id}/documents/{self.document_id}/chunks/{self.id}",update_message)
|
|
23
|
+
res = res.json()
|
|
24
|
+
if res.get("code") != 0 :
|
|
25
|
+
raise Exception(res["message"])
|
|
26
|
+
|
|
27
|
+
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from .document import Document
|
|
4
|
+
|
|
5
|
+
from .base import Base
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class DataSet(Base):
|
|
9
|
+
class ParserConfig(Base):
|
|
10
|
+
def __init__(self, rag, res_dict):
|
|
11
|
+
super().__init__(rag, res_dict)
|
|
12
|
+
|
|
13
|
+
def __init__(self, rag, res_dict):
|
|
14
|
+
self.id = ""
|
|
15
|
+
self.name = ""
|
|
16
|
+
self.avatar = ""
|
|
17
|
+
self.tenant_id = None
|
|
18
|
+
self.description = ""
|
|
19
|
+
self.language = "English"
|
|
20
|
+
self.embedding_model = ""
|
|
21
|
+
self.permission = "me"
|
|
22
|
+
self.document_count = 0
|
|
23
|
+
self.chunk_count = 0
|
|
24
|
+
self.chunk_method = "naive"
|
|
25
|
+
self.parser_config = None
|
|
26
|
+
for k in list(res_dict.keys()):
|
|
27
|
+
if k not in self.__dict__:
|
|
28
|
+
res_dict.pop(k)
|
|
29
|
+
super().__init__(rag, res_dict)
|
|
30
|
+
|
|
31
|
+
def update(self, update_message: dict):
|
|
32
|
+
res = self.put(f'/datasets/{self.id}',
|
|
33
|
+
update_message)
|
|
34
|
+
res = res.json()
|
|
35
|
+
if res.get("code") != 0:
|
|
36
|
+
raise Exception(res["message"])
|
|
37
|
+
|
|
38
|
+
def upload_documents(self,document_list: List[dict]):
|
|
39
|
+
url = f"/datasets/{self.id}/documents"
|
|
40
|
+
files = [("file",(ele["displayed_name"],ele["blob"])) for ele in document_list]
|
|
41
|
+
res = self.post(path=url,json=None,files=files)
|
|
42
|
+
res = res.json()
|
|
43
|
+
if res.get("code") == 0:
|
|
44
|
+
doc_list=[]
|
|
45
|
+
for doc in res["data"]:
|
|
46
|
+
document = Document(self.rag,doc)
|
|
47
|
+
doc_list.append(document)
|
|
48
|
+
return doc_list
|
|
49
|
+
raise Exception(res.get("message"))
|
|
50
|
+
|
|
51
|
+
def list_documents(self, id: str = None, keywords: str = None, offset: int =1, limit: int = 1024, orderby: str = "create_time", desc: bool = True):
|
|
52
|
+
res = self.get(f"/datasets/{self.id}/documents",params={"id": id,"keywords": keywords,"offset": offset,"limit": limit,"orderby": orderby,"desc": desc})
|
|
53
|
+
res = res.json()
|
|
54
|
+
documents = []
|
|
55
|
+
if res.get("code") == 0:
|
|
56
|
+
for document in res["data"].get("docs"):
|
|
57
|
+
documents.append(Document(self.rag,document))
|
|
58
|
+
return documents
|
|
59
|
+
raise Exception(res["message"])
|
|
60
|
+
|
|
61
|
+
def delete_documents(self,ids: List[str] = None):
|
|
62
|
+
res = self.rm(f"/datasets/{self.id}/documents",{"ids":ids})
|
|
63
|
+
res = res.json()
|
|
64
|
+
if res.get("code") != 0:
|
|
65
|
+
raise Exception(res["message"])
|
|
66
|
+
|
|
67
|
+
def async_parse_documents(self,document_ids):
|
|
68
|
+
res = self.post(f"/datasets/{self.id}/chunks",{"document_ids":document_ids})
|
|
69
|
+
res = res.json()
|
|
70
|
+
if res.get("code") != 0:
|
|
71
|
+
raise Exception(res.get("message"))
|
|
72
|
+
|
|
73
|
+
def async_cancel_parse_documents(self,document_ids):
|
|
74
|
+
res = self.rm(f"/datasets/{self.id}/chunks",{"document_ids":document_ids})
|
|
75
|
+
res = res.json()
|
|
76
|
+
if res.get("code") != 0:
|
|
77
|
+
raise Exception(res.get("message"))
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from .base import Base
|
|
3
|
+
from .chunk import Chunk
|
|
4
|
+
from typing import List
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Document(Base):
|
|
8
|
+
class ParserConfig(Base):
|
|
9
|
+
def __init__(self, rag, res_dict):
|
|
10
|
+
super().__init__(rag, res_dict)
|
|
11
|
+
|
|
12
|
+
def __init__(self, rag, res_dict):
|
|
13
|
+
self.id = ""
|
|
14
|
+
self.name = ""
|
|
15
|
+
self.thumbnail = None
|
|
16
|
+
self.dataset_id = None
|
|
17
|
+
self.chunk_method = "naive"
|
|
18
|
+
self.parser_config = {"pages": [[1, 1000000]]}
|
|
19
|
+
self.source_type = "local"
|
|
20
|
+
self.type = ""
|
|
21
|
+
self.created_by = ""
|
|
22
|
+
self.size = 0
|
|
23
|
+
self.token_count = 0
|
|
24
|
+
self.chunk_count = 0
|
|
25
|
+
self.progress = 0.0
|
|
26
|
+
self.progress_msg = ""
|
|
27
|
+
self.process_begin_at = None
|
|
28
|
+
self.process_duration = 0.0
|
|
29
|
+
self.run = "0"
|
|
30
|
+
self.status = "1"
|
|
31
|
+
for k in list(res_dict.keys()):
|
|
32
|
+
if k not in self.__dict__:
|
|
33
|
+
res_dict.pop(k)
|
|
34
|
+
super().__init__(rag, res_dict)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def update(self, update_message: dict):
|
|
38
|
+
res = self.put(f'/datasets/{self.dataset_id}/documents/{self.id}',
|
|
39
|
+
update_message)
|
|
40
|
+
res = res.json()
|
|
41
|
+
if res.get("code") != 0:
|
|
42
|
+
raise Exception(res["message"])
|
|
43
|
+
|
|
44
|
+
def download(self):
|
|
45
|
+
res = self.get(f"/datasets/{self.dataset_id}/documents/{self.id}")
|
|
46
|
+
try:
|
|
47
|
+
res = res.json()
|
|
48
|
+
raise Exception(res.get("message"))
|
|
49
|
+
except json.JSONDecodeError:
|
|
50
|
+
return res.content
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def list_chunks(self,offset=0, limit=30, keywords="", id:str=None):
|
|
54
|
+
data={"document_id": self.id,"keywords": keywords,"offset":offset,"limit":limit,"id":id}
|
|
55
|
+
res = self.get(f'/datasets/{self.dataset_id}/documents/{self.id}/chunks', data)
|
|
56
|
+
res = res.json()
|
|
57
|
+
if res.get("code") == 0:
|
|
58
|
+
chunks=[]
|
|
59
|
+
for data in res["data"].get("chunks"):
|
|
60
|
+
chunk = Chunk(self.rag,data)
|
|
61
|
+
chunks.append(chunk)
|
|
62
|
+
return chunks
|
|
63
|
+
raise Exception(res.get("message"))
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def add_chunk(self, content: str,important_keywords:List[str]=[]):
|
|
67
|
+
res = self.post(f'/datasets/{self.dataset_id}/documents/{self.id}/chunks', {"content":content,"important_keywords":important_keywords})
|
|
68
|
+
res = res.json()
|
|
69
|
+
if res.get("code") == 0:
|
|
70
|
+
return Chunk(self.rag,res["data"].get("chunk"))
|
|
71
|
+
raise Exception(res.get("message"))
|
|
72
|
+
|
|
73
|
+
def delete_chunks(self,ids:List[str] = None):
|
|
74
|
+
res = self.rm(f"datasets/{self.dataset_id}/documents/{self.id}/chunks",{"ids":ids})
|
|
75
|
+
res = res.json()
|
|
76
|
+
if res.get("code")!=0:
|
|
77
|
+
raise Exception(res.get("message"))
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
from .base import Base
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Session(Base):
|
|
7
|
+
def __init__(self, rag, res_dict):
|
|
8
|
+
self.id = None
|
|
9
|
+
self.name = "New session"
|
|
10
|
+
self.messages = [{"role": "assistant", "content": "Hi! I am your assistant,can I help you?"}]
|
|
11
|
+
self.chat_id = None
|
|
12
|
+
super().__init__(rag, res_dict)
|
|
13
|
+
|
|
14
|
+
def ask(self, question: str, stream: bool = False):
|
|
15
|
+
for message in self.messages:
|
|
16
|
+
if "reference" in message:
|
|
17
|
+
message.pop("reference")
|
|
18
|
+
res = self.post(f"/chats/{self.chat_id}/completions",
|
|
19
|
+
{"question": question, "stream": True,"session_id":self.id}, stream=stream)
|
|
20
|
+
for line in res.iter_lines():
|
|
21
|
+
line = line.decode("utf-8")
|
|
22
|
+
if line.startswith("{"):
|
|
23
|
+
json_data = json.loads(line)
|
|
24
|
+
raise Exception(json_data["message"])
|
|
25
|
+
if line.startswith("data:"):
|
|
26
|
+
json_data = json.loads(line[5:])
|
|
27
|
+
if json_data["data"] != True:
|
|
28
|
+
answer = json_data["data"]["answer"]
|
|
29
|
+
reference = json_data["data"]["reference"]
|
|
30
|
+
temp_dict = {
|
|
31
|
+
"content": answer,
|
|
32
|
+
"role": "assistant"
|
|
33
|
+
}
|
|
34
|
+
if "chunks" in reference:
|
|
35
|
+
chunks = reference["chunks"]
|
|
36
|
+
chunk_list = []
|
|
37
|
+
for chunk in chunks:
|
|
38
|
+
new_chunk = {
|
|
39
|
+
"id": chunk["chunk_id"],
|
|
40
|
+
"content": chunk["content_with_weight"],
|
|
41
|
+
"document_id": chunk["doc_id"],
|
|
42
|
+
"document_name": chunk["docnm_kwd"],
|
|
43
|
+
"dataset_id": chunk["kb_id"],
|
|
44
|
+
"image_id": chunk["img_id"],
|
|
45
|
+
"similarity": chunk["similarity"],
|
|
46
|
+
"vector_similarity": chunk["vector_similarity"],
|
|
47
|
+
"term_similarity": chunk["term_similarity"],
|
|
48
|
+
"positions": chunk["positions"],
|
|
49
|
+
}
|
|
50
|
+
chunk_list.append(new_chunk)
|
|
51
|
+
temp_dict["reference"] = chunk_list
|
|
52
|
+
message = Message(self.rag, temp_dict)
|
|
53
|
+
yield message
|
|
54
|
+
|
|
55
|
+
def update(self,update_message):
|
|
56
|
+
res = self.put(f"/chats/{self.chat_id}/sessions/{self.id}",
|
|
57
|
+
update_message)
|
|
58
|
+
res = res.json()
|
|
59
|
+
if res.get("code") != 0:
|
|
60
|
+
raise Exception(res.get("message"))
|
|
61
|
+
|
|
62
|
+
class Message(Base):
|
|
63
|
+
def __init__(self, rag, res_dict):
|
|
64
|
+
self.content = "Hi! I am your assistant,can I help you?"
|
|
65
|
+
self.reference = None
|
|
66
|
+
self.role = "assistant"
|
|
67
|
+
self.prompt = None
|
|
68
|
+
self.id = None
|
|
69
|
+
super().__init__(rag, res_dict)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class Chunk(Base):
|
|
73
|
+
def __init__(self, rag, res_dict):
|
|
74
|
+
self.id = None
|
|
75
|
+
self.content = None
|
|
76
|
+
self.document_id = ""
|
|
77
|
+
self.document_name = ""
|
|
78
|
+
self.dataset_id = ""
|
|
79
|
+
self.image_id = ""
|
|
80
|
+
self.similarity = None
|
|
81
|
+
self.vector_similarity = None
|
|
82
|
+
self.term_similarity = None
|
|
83
|
+
self.positions = None
|
|
84
|
+
super().__init__(rag, res_dict)
|
|
85
|
+
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
from typing import List
|
|
17
|
+
|
|
18
|
+
import requests
|
|
19
|
+
|
|
20
|
+
from .modules.chat import Chat
|
|
21
|
+
from .modules.chunk import Chunk
|
|
22
|
+
from .modules.dataset import DataSet
|
|
23
|
+
from .modules.document import Document
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class RAGFlow:
|
|
27
|
+
def __init__(self, api_key, base_url, version='v1'):
|
|
28
|
+
"""
|
|
29
|
+
api_url: http://<host_address>/api/v1
|
|
30
|
+
"""
|
|
31
|
+
self.user_key = api_key
|
|
32
|
+
self.api_url = f"{base_url}/api/{version}"
|
|
33
|
+
self.authorization_header = {"Authorization": "{} {}".format("Bearer", self.user_key)}
|
|
34
|
+
|
|
35
|
+
def post(self, path, json=None, stream=False, files=None):
|
|
36
|
+
res = requests.post(url=self.api_url + path, json=json, headers=self.authorization_header, stream=stream,files=files)
|
|
37
|
+
return res
|
|
38
|
+
|
|
39
|
+
def get(self, path, params=None, json=None):
|
|
40
|
+
res = requests.get(url=self.api_url + path, params=params, headers=self.authorization_header,json=json)
|
|
41
|
+
return res
|
|
42
|
+
|
|
43
|
+
def delete(self, path, json):
|
|
44
|
+
res = requests.delete(url=self.api_url + path, json=json, headers=self.authorization_header)
|
|
45
|
+
return res
|
|
46
|
+
|
|
47
|
+
def put(self, path, json):
|
|
48
|
+
res = requests.put(url=self.api_url + path, json= json,headers=self.authorization_header)
|
|
49
|
+
return res
|
|
50
|
+
|
|
51
|
+
def create_dataset(self, name: str, avatar: str = "", description: str = "", language: str = "English",
|
|
52
|
+
permission: str = "me",chunk_method: str = "naive",
|
|
53
|
+
parser_config: DataSet.ParserConfig = None) -> DataSet:
|
|
54
|
+
if parser_config:
|
|
55
|
+
parser_config = parser_config.to_json()
|
|
56
|
+
res = self.post("/datasets",
|
|
57
|
+
{"name": name, "avatar": avatar, "description": description, "language": language,
|
|
58
|
+
"permission": permission, "chunk_method": chunk_method,
|
|
59
|
+
"parser_config": parser_config
|
|
60
|
+
}
|
|
61
|
+
)
|
|
62
|
+
res = res.json()
|
|
63
|
+
if res.get("code") == 0:
|
|
64
|
+
return DataSet(self, res["data"])
|
|
65
|
+
raise Exception(res["message"])
|
|
66
|
+
|
|
67
|
+
def delete_datasets(self, ids: List[str] = None):
|
|
68
|
+
res = self.delete("/datasets",{"ids": ids})
|
|
69
|
+
res=res.json()
|
|
70
|
+
if res.get("code") != 0:
|
|
71
|
+
raise Exception(res["message"])
|
|
72
|
+
|
|
73
|
+
def get_dataset(self,name: str):
|
|
74
|
+
_list = self.list_datasets(name=name)
|
|
75
|
+
if len(_list) > 0:
|
|
76
|
+
return _list[0]
|
|
77
|
+
raise Exception("Dataset %s not found" % name)
|
|
78
|
+
|
|
79
|
+
def list_datasets(self, page: int = 1, page_size: int = 1024, orderby: str = "create_time", desc: bool = True,
|
|
80
|
+
id: str = None, name: str = None) -> \
|
|
81
|
+
List[DataSet]:
|
|
82
|
+
res = self.get("/datasets",
|
|
83
|
+
{"page": page, "page_size": page_size, "orderby": orderby, "desc": desc, "id": id, "name": name})
|
|
84
|
+
res = res.json()
|
|
85
|
+
result_list = []
|
|
86
|
+
if res.get("code") == 0:
|
|
87
|
+
for data in res['data']:
|
|
88
|
+
result_list.append(DataSet(self, data))
|
|
89
|
+
return result_list
|
|
90
|
+
raise Exception(res["message"])
|
|
91
|
+
|
|
92
|
+
def create_chat(self, name: str, avatar: str = "", dataset_ids: List[str] = [],
|
|
93
|
+
llm: Chat.LLM = None, prompt: Chat.Prompt = None) -> Chat:
|
|
94
|
+
dataset_list = []
|
|
95
|
+
for id in dataset_ids:
|
|
96
|
+
dataset_list.append(id)
|
|
97
|
+
|
|
98
|
+
if llm is None:
|
|
99
|
+
llm = Chat.LLM(self, {"model_name": None,
|
|
100
|
+
"temperature": 0.1,
|
|
101
|
+
"top_p": 0.3,
|
|
102
|
+
"presence_penalty": 0.4,
|
|
103
|
+
"frequency_penalty": 0.7,
|
|
104
|
+
"max_tokens": 512, })
|
|
105
|
+
if prompt is None:
|
|
106
|
+
prompt = Chat.Prompt(self, {"similarity_threshold": 0.2,
|
|
107
|
+
"keywords_similarity_weight": 0.7,
|
|
108
|
+
"top_n": 8,
|
|
109
|
+
"variables": [{
|
|
110
|
+
"key": "knowledge",
|
|
111
|
+
"optional": True
|
|
112
|
+
}], "rerank_model": "",
|
|
113
|
+
"empty_response": None,
|
|
114
|
+
"opener": None,
|
|
115
|
+
"show_quote": True,
|
|
116
|
+
"prompt": None})
|
|
117
|
+
if prompt.opener is None:
|
|
118
|
+
prompt.opener = "Hi! I'm your assistant, what can I do for you?"
|
|
119
|
+
if prompt.prompt is None:
|
|
120
|
+
prompt.prompt = (
|
|
121
|
+
"You are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. "
|
|
122
|
+
"Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, "
|
|
123
|
+
"your answer must include the sentence 'The answer you are looking for is not found in the knowledge base!' "
|
|
124
|
+
"Answers need to consider chat history.\nHere is the knowledge base:\n{knowledge}\nThe above is the knowledge base."
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
temp_dict = {"name": name,
|
|
128
|
+
"avatar": avatar,
|
|
129
|
+
"dataset_ids": dataset_list,
|
|
130
|
+
"llm": llm.to_json(),
|
|
131
|
+
"prompt": prompt.to_json()}
|
|
132
|
+
res = self.post("/chats", temp_dict)
|
|
133
|
+
res = res.json()
|
|
134
|
+
if res.get("code") == 0:
|
|
135
|
+
return Chat(self, res["data"])
|
|
136
|
+
raise Exception(res["message"])
|
|
137
|
+
|
|
138
|
+
def delete_chats(self,ids: List[str] = None) -> bool:
|
|
139
|
+
res = self.delete('/chats',
|
|
140
|
+
{"ids":ids})
|
|
141
|
+
res = res.json()
|
|
142
|
+
if res.get("code") != 0:
|
|
143
|
+
raise Exception(res["message"])
|
|
144
|
+
|
|
145
|
+
def list_chats(self, page: int = 1, page_size: int = 1024, orderby: str = "create_time", desc: bool = True,
|
|
146
|
+
id: str = None, name: str = None) -> List[Chat]:
|
|
147
|
+
res = self.get("/chats",{"page": page, "page_size": page_size, "orderby": orderby, "desc": desc, "id": id, "name": name})
|
|
148
|
+
res = res.json()
|
|
149
|
+
result_list = []
|
|
150
|
+
if res.get("code") == 0:
|
|
151
|
+
for data in res['data']:
|
|
152
|
+
result_list.append(Chat(self, data))
|
|
153
|
+
return result_list
|
|
154
|
+
raise Exception(res["message"])
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def retrieve(self, dataset_ids, document_ids=None, question="", offset=1, limit=1024, similarity_threshold=0.2, vector_similarity_weight=0.3, top_k=1024, rerank_id:str=None, keyword:bool=False, ):
|
|
158
|
+
if document_ids is None:
|
|
159
|
+
document_ids = []
|
|
160
|
+
data_json ={
|
|
161
|
+
"offset": offset,
|
|
162
|
+
"limit": limit,
|
|
163
|
+
"similarity_threshold": similarity_threshold,
|
|
164
|
+
"vector_similarity_weight": vector_similarity_weight,
|
|
165
|
+
"top_k": top_k,
|
|
166
|
+
"rerank_id": rerank_id,
|
|
167
|
+
"keyword": keyword,
|
|
168
|
+
"question": question,
|
|
169
|
+
"datasets": dataset_ids,
|
|
170
|
+
"documents": document_ids
|
|
171
|
+
}
|
|
172
|
+
# Send a POST request to the backend service (using requests library as an example, actual implementation may vary)
|
|
173
|
+
res = self.post(f'/retrieval',json=data_json)
|
|
174
|
+
res = res.json()
|
|
175
|
+
if res.get("code") ==0:
|
|
176
|
+
chunks=[]
|
|
177
|
+
for chunk_data in res["data"].get("chunks"):
|
|
178
|
+
chunk=Chunk(self,chunk_data)
|
|
179
|
+
chunks.append(chunk)
|
|
180
|
+
return chunks
|
|
181
|
+
raise Exception(res.get("message"))
|