hjxdl 0.2.18__py3-none-any.whl → 0.2.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hdl/_version.py +2 -2
- hdl/utils/llm/visrag.py +2 -2
- {hjxdl-0.2.18.dist-info → hjxdl-0.2.20.dist-info}/METADATA +1 -1
- {hjxdl-0.2.18.dist-info → hjxdl-0.2.20.dist-info}/RECORD +6 -7
- hdl/utils/llm/ocrrag.py +0 -225
- {hjxdl-0.2.18.dist-info → hjxdl-0.2.20.dist-info}/WHEEL +0 -0
- {hjxdl-0.2.18.dist-info → hjxdl-0.2.20.dist-info}/top_level.txt +0 -0
hdl/_version.py
CHANGED
hdl/utils/llm/visrag.py
CHANGED
@@ -198,7 +198,7 @@ if __name__ == '__main__':
|
|
198
198
|
)
|
199
199
|
|
200
200
|
with gr.Blocks() as app:
|
201
|
-
gr.Markdown("#
|
201
|
+
gr.Markdown("# RAG-PDFQA: Two Vision Language Models Enable End-to-End RAG")
|
202
202
|
|
203
203
|
file_input = gr.File(type="binary", label="Step 1: Upload PDF")
|
204
204
|
file_result = gr.Text(label="Knowledge Base ID")
|
@@ -217,7 +217,7 @@ if __name__ == '__main__':
|
|
217
217
|
inputs=[kb_id_input, query_input, topk_input], outputs=images_output)
|
218
218
|
|
219
219
|
button = gr.Button("Answer Question")
|
220
|
-
gen_model_response = gr.Textbox(label="
|
220
|
+
gen_model_response = gr.Textbox(label="Answer")
|
221
221
|
|
222
222
|
button.click(lambda images, question: answer_question(images, question, gen_model),
|
223
223
|
inputs=[images_output, query_input], outputs=gen_model_response)
|
@@ -1,5 +1,5 @@
|
|
1
1
|
hdl/__init__.py,sha256=GffnD0jLJdhkd-vo989v40N90sQbofkayRBwxc6TVhQ,72
|
2
|
-
hdl/_version.py,sha256=
|
2
|
+
hdl/_version.py,sha256=a4xkHHagVd1Q5fDYBTP_mD5TwWjJASwGjEaI_TLYo5w,413
|
3
3
|
hdl/args/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
hdl/args/loss_args.py,sha256=s7YzSdd7IjD24rZvvOrxLLFqMZQb9YylxKeyelSdrTk,70
|
5
5
|
hdl/controllers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -132,14 +132,13 @@ hdl/utils/llm/chatgr.py,sha256=GO2G7g6YybduA5VCUuGjvEsJfC_6L7rycSnPeHMcxyM,2820
|
|
132
132
|
hdl/utils/llm/embs.py,sha256=Tf0FOYrOFZp7qQpEPiSCXzlgyHH0X9HVTUtsup74a9E,7174
|
133
133
|
hdl/utils/llm/extract.py,sha256=2sK_WJzmYIc8iuWaM9DA6Nw3_6q1O4lJ5pKpcZo-bBA,6512
|
134
134
|
hdl/utils/llm/llama_chat.py,sha256=watcHGOaz-bv3x-yDucYlGk5f8FiqfFhwWogrl334fk,4387
|
135
|
-
hdl/utils/llm/ocrrag.py,sha256=rwCfIzgHCxPBaG0JkzJiC-4Ci3NjMx5h5OYVxL1EmSg,9021
|
136
135
|
hdl/utils/llm/vis.py,sha256=-6QvxSVzKqxLh_l0aYg2wN2G5HOiQvCpfp-jn9twXw0,16210
|
137
|
-
hdl/utils/llm/visrag.py,sha256=
|
136
|
+
hdl/utils/llm/visrag.py,sha256=_PuKtmQIXD5bnmXwDWhTLdzOhgC42JiqdMNb1uKA7n8,9190
|
138
137
|
hdl/utils/schedulers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
139
138
|
hdl/utils/schedulers/norm_lr.py,sha256=bDwCmdEK-WkgxQMFBiMuchv8Mm7C0-GZJ6usm-PQk14,4461
|
140
139
|
hdl/utils/weather/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
141
140
|
hdl/utils/weather/weather.py,sha256=k11o6wM15kF8b9NMlEfrg68ak-SfSYLN3nOOflFUv-I,4381
|
142
|
-
hjxdl-0.2.
|
143
|
-
hjxdl-0.2.
|
144
|
-
hjxdl-0.2.
|
145
|
-
hjxdl-0.2.
|
141
|
+
hjxdl-0.2.20.dist-info/METADATA,sha256=DuluBVeDJWGaS5su2wfbIGZpiifkMoaC6E4h13137G0,836
|
142
|
+
hjxdl-0.2.20.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
|
143
|
+
hjxdl-0.2.20.dist-info/top_level.txt,sha256=-kxwTM5JPhylp06z3zAVO3w6_h7wtBfBo2zgM6YZoTk,4
|
144
|
+
hjxdl-0.2.20.dist-info/RECORD,,
|
hdl/utils/llm/ocrrag.py
DELETED
@@ -1,225 +0,0 @@
|
|
1
|
-
import argparse
|
2
|
-
from PIL import Image
|
3
|
-
import hashlib
|
4
|
-
import torch
|
5
|
-
import fitz
|
6
|
-
import gradio as gr
|
7
|
-
import os
|
8
|
-
import numpy as np
|
9
|
-
import json
|
10
|
-
from transformers import AutoModel, AutoTokenizer
|
11
|
-
|
12
|
-
from .chat import OpenAI_M
|
13
|
-
from .vis import pilimg_to_base64
|
14
|
-
|
15
|
-
def get_image_md5(img: Image.Image):
|
16
|
-
img_byte_array = img.tobytes()
|
17
|
-
hash_md5 = hashlib.md5()
|
18
|
-
hash_md5.update(img_byte_array)
|
19
|
-
hex_digest = hash_md5.hexdigest()
|
20
|
-
return hex_digest
|
21
|
-
|
22
|
-
def calculate_md5_from_binary(binary_data):
|
23
|
-
hash_md5 = hashlib.md5()
|
24
|
-
hash_md5.update(binary_data)
|
25
|
-
return hash_md5.hexdigest()
|
26
|
-
|
27
|
-
def add_pdf_gradio(pdf_file_binary, progress=gr.Progress(), cache_dir=None, model=None, tokenizer=None):
|
28
|
-
model.eval()
|
29
|
-
|
30
|
-
knowledge_base_name = calculate_md5_from_binary(pdf_file_binary)
|
31
|
-
|
32
|
-
this_cache_dir = os.path.join(cache_dir, knowledge_base_name)
|
33
|
-
os.makedirs(this_cache_dir, exist_ok=True)
|
34
|
-
|
35
|
-
with open(os.path.join(this_cache_dir, f"src.pdf"), 'wb') as file:
|
36
|
-
file.write(pdf_file_binary)
|
37
|
-
|
38
|
-
dpi = 200
|
39
|
-
doc = fitz.open("pdf", pdf_file_binary)
|
40
|
-
|
41
|
-
reps_list = []
|
42
|
-
images = []
|
43
|
-
image_md5s = []
|
44
|
-
|
45
|
-
for page in progress.tqdm(doc):
|
46
|
-
pix = page.get_pixmap(dpi=dpi)
|
47
|
-
image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
48
|
-
image_md5 = get_image_md5(image)
|
49
|
-
image_md5s.append(image_md5)
|
50
|
-
with torch.no_grad():
|
51
|
-
reps = model(text=[''], image=[image], tokenizer=tokenizer).reps
|
52
|
-
reps_list.append(reps.squeeze(0).cpu().numpy())
|
53
|
-
images.append(image)
|
54
|
-
|
55
|
-
for idx in range(len(images)):
|
56
|
-
image = images[idx]
|
57
|
-
image_md5 = image_md5s[idx]
|
58
|
-
cache_image_path = os.path.join(this_cache_dir, f"{image_md5}.png")
|
59
|
-
image.save(cache_image_path)
|
60
|
-
|
61
|
-
np.save(os.path.join(this_cache_dir, f"reps.npy"), reps_list)
|
62
|
-
|
63
|
-
with open(os.path.join(this_cache_dir, f"md5s.txt"), 'w') as f:
|
64
|
-
for item in image_md5s:
|
65
|
-
f.write(item+'\n')
|
66
|
-
|
67
|
-
return knowledge_base_name
|
68
|
-
|
69
|
-
def retrieve_gradio(knowledge_base, query, topk, cache_dir=None, model=None, tokenizer=None):
|
70
|
-
model.eval()
|
71
|
-
|
72
|
-
target_cache_dir = os.path.join(cache_dir, knowledge_base)
|
73
|
-
|
74
|
-
if not os.path.exists(target_cache_dir):
|
75
|
-
return None
|
76
|
-
|
77
|
-
md5s = []
|
78
|
-
with open(os.path.join(target_cache_dir, f"md5s.txt"), 'r') as f:
|
79
|
-
for line in f:
|
80
|
-
md5s.append(line.rstrip('\n'))
|
81
|
-
|
82
|
-
doc_reps = np.load(os.path.join(target_cache_dir, f"reps.npy"))
|
83
|
-
|
84
|
-
query_with_instruction = "Represent this query for retrieving relevant document: " + query
|
85
|
-
with torch.no_grad():
|
86
|
-
query_rep = model(text=[query_with_instruction], image=[None], tokenizer=tokenizer).reps.squeeze(0).cpu()
|
87
|
-
|
88
|
-
query_md5 = hashlib.md5(query.encode()).hexdigest()
|
89
|
-
|
90
|
-
doc_reps_cat = torch.stack([torch.Tensor(i) for i in doc_reps], dim=0)
|
91
|
-
|
92
|
-
similarities = torch.matmul(query_rep, doc_reps_cat.T)
|
93
|
-
|
94
|
-
topk_values, topk_doc_ids = torch.topk(similarities, k=topk)
|
95
|
-
|
96
|
-
images_topk = [Image.open(os.path.join(target_cache_dir, f"{md5s[idx]}.png")) for idx in topk_doc_ids.cpu().numpy()]
|
97
|
-
|
98
|
-
with open(os.path.join(target_cache_dir, f"q-{query_md5}.json"), 'w') as f:
|
99
|
-
f.write(json.dumps(
|
100
|
-
{
|
101
|
-
"knowledge_base": knowledge_base,
|
102
|
-
"query": query,
|
103
|
-
"retrieved_docs": [os.path.join(target_cache_dir, f"{md5s[idx]}.png") for idx in topk_doc_ids.cpu().numpy()]
|
104
|
-
}, indent=4, ensure_ascii=False
|
105
|
-
))
|
106
|
-
|
107
|
-
return images_topk
|
108
|
-
|
109
|
-
def answer_question_stream(images, question, gen_model):
|
110
|
-
# Load images from the image paths in images[0]
|
111
|
-
pil_images = [Image.open(image[0]).convert('RGB') for image in images]
|
112
|
-
|
113
|
-
# Calculate the total size of the new image (for vertical concatenation)
|
114
|
-
widths, heights = zip(*(img.size for img in pil_images))
|
115
|
-
|
116
|
-
# Assuming vertical concatenation, so width is the max width, height is the sum of heights
|
117
|
-
total_width = max(widths)
|
118
|
-
total_height = sum(heights)
|
119
|
-
|
120
|
-
# Create a new blank image with the total width and height
|
121
|
-
new_image = Image.new('RGB', (total_width, total_height))
|
122
|
-
|
123
|
-
# Paste each image into the new image
|
124
|
-
y_offset = 0
|
125
|
-
for img in pil_images:
|
126
|
-
new_image.paste(img, (0, y_offset))
|
127
|
-
y_offset += img.height # Move the offset down by the height of the image
|
128
|
-
|
129
|
-
# Convert the concatenated image to base64
|
130
|
-
new_image_base64 = pilimg_to_base64(new_image)
|
131
|
-
|
132
|
-
# Stream the answer from the model
|
133
|
-
partial_answer_text = ""
|
134
|
-
for partial_answer in gen_model.chat(
|
135
|
-
prompt=question,
|
136
|
-
images=[new_image_base64], # Use the concatenated image
|
137
|
-
stream=True # Enable streaming
|
138
|
-
):
|
139
|
-
partial_answer_text += partial_answer
|
140
|
-
yield gr.update(value=partial_answer_text)
|
141
|
-
|
142
|
-
def upvote(knowledge_base, query, cache_dir):
|
143
|
-
target_cache_dir = os.path.join(cache_dir, knowledge_base)
|
144
|
-
query_md5 = hashlib.md5(query.encode()).hexdigest()
|
145
|
-
|
146
|
-
with open(os.path.join(target_cache_dir, f"q-{query_md5}.json"), 'r') as f:
|
147
|
-
data = json.loads(f.read())
|
148
|
-
|
149
|
-
data["user_preference"] = "upvote"
|
150
|
-
|
151
|
-
with open(os.path.join(target_cache_dir, f"q-{query_md5}-withpref.json"), 'w') as f:
|
152
|
-
f.write(json.dumps(data, indent=4, ensure_ascii=False))
|
153
|
-
|
154
|
-
def downvote(knowledge_base, query, cache_dir):
|
155
|
-
target_cache_dir = os.path.join(cache_dir, knowledge_base)
|
156
|
-
query_md5 = hashlib.md5(query.encode()).hexdigest()
|
157
|
-
|
158
|
-
with open(os.path.join(target_cache_dir, f"q-{query_md5}.json"), 'r') as f:
|
159
|
-
data = json.loads(f.read())
|
160
|
-
|
161
|
-
data["user_preference"] = "downvote"
|
162
|
-
|
163
|
-
with open(os.path.join(target_cache_dir, f"q-{query_md5}-withpref.json"), 'w') as f:
|
164
|
-
f.write(json.dumps(data, indent=4, ensure_ascii=False))
|
165
|
-
|
166
|
-
if __name__ == '__main__':
|
167
|
-
parser = argparse.ArgumentParser(description="MiniCPMV-RAG-PDFQA Script")
|
168
|
-
parser.add_argument('--cache-dir', dest='cache_dir', type=str, required=True, help='Cache directory path')
|
169
|
-
parser.add_argument('--device', dest='device', type=str, default='cuda:0', help='Device for model inference')
|
170
|
-
parser.add_argument('--model-path', dest='model_path', type=str, required=True, help='Path to the embedding model')
|
171
|
-
parser.add_argument('--llm-host', dest='llm_host', type=str, default='127.0.0.1', help='LLM server IP address')
|
172
|
-
parser.add_argument('--llm-port', dest='llm_port', type=int, default=22299, help='LLM server port')
|
173
|
-
parser.add_argument('--server-name', dest='server_name', type=str, default='0.0.0.0', help='Gradio server name')
|
174
|
-
parser.add_argument('--server-port', dest='server_port', type=int, default=10077, help='Gradio server port')
|
175
|
-
|
176
|
-
args = parser.parse_args()
|
177
|
-
|
178
|
-
print("Loading embedding model...")
|
179
|
-
tokenizer = AutoTokenizer.from_pretrained(args.model_path, trust_remote_code=True)
|
180
|
-
model = AutoModel.from_pretrained(args.model_path, trust_remote_code=True)
|
181
|
-
model.to(args.device)
|
182
|
-
model.eval()
|
183
|
-
print("Embedding model loaded!")
|
184
|
-
|
185
|
-
gen_model = OpenAI_M(
|
186
|
-
server_ip=args.llm_host,
|
187
|
-
server_port=args.llm_port
|
188
|
-
)
|
189
|
-
|
190
|
-
with gr.Blocks() as app:
|
191
|
-
gr.Markdown("# MiniCPMV-RAG-PDFQA: Two Vision Language Models Enable End-to-End RAG")
|
192
|
-
|
193
|
-
file_input = gr.File(type="binary", label="Step 1: Upload PDF")
|
194
|
-
file_result = gr.Text(label="Knowledge Base ID")
|
195
|
-
process_button = gr.Button("Process PDF")
|
196
|
-
|
197
|
-
process_button.click(lambda pdf: add_pdf_gradio(pdf, cache_dir=args.cache_dir, model=model, tokenizer=tokenizer),
|
198
|
-
inputs=file_input, outputs=file_result)
|
199
|
-
|
200
|
-
kb_id_input = gr.Text(label="Knowledge Base ID")
|
201
|
-
query_input = gr.Text(label="Your Question")
|
202
|
-
topk_input = gr.Number(value=5, minimum=1, maximum=10, step=1, label="Number of pages to retrieve")
|
203
|
-
retrieve_button = gr.Button("Retrieve Pages")
|
204
|
-
images_output = gr.Gallery(label="Retrieved Pages")
|
205
|
-
|
206
|
-
retrieve_button.click(lambda kb, query, topk: retrieve_gradio(kb, query, topk, cache_dir=args.cache_dir, model=model, tokenizer=tokenizer),
|
207
|
-
inputs=[kb_id_input, query_input, topk_input], outputs=images_output)
|
208
|
-
|
209
|
-
button = gr.Button("Answer Question")
|
210
|
-
gen_model_response = gr.Textbox(label="MiniCPM-V-2.6's Answer", lines=10)
|
211
|
-
|
212
|
-
# Use answer_question_stream for streaming response and pass gen_model
|
213
|
-
button.click(lambda images, query: answer_question_stream(images, query, gen_model),
|
214
|
-
inputs=[images_output, query_input],
|
215
|
-
outputs=gen_model_response)
|
216
|
-
|
217
|
-
upvote_button = gr.Button("🤗 Upvote")
|
218
|
-
downvote_button = gr.Button("🤣 Downvote")
|
219
|
-
|
220
|
-
upvote_button.click(lambda kb, query: upvote(kb, query, cache_dir=args.cache_dir),
|
221
|
-
inputs=[kb_id_input, query_input], outputs=None)
|
222
|
-
downvote_button.click(lambda kb, query: downvote(kb, query, cache_dir=args.cache_dir),
|
223
|
-
inputs=[kb_id_input, query_input], outputs=None)
|
224
|
-
|
225
|
-
app.launch(server_name=args.server_name, server_port=args.server_port)
|
File without changes
|
File without changes
|