hjxdl 0.2.18__py3-none-any.whl → 0.2.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hdl/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.2.18'
16
- __version_tuple__ = version_tuple = (0, 2, 18)
15
+ __version__ = version = '0.2.20'
16
+ __version_tuple__ = version_tuple = (0, 2, 20)
hdl/utils/llm/visrag.py CHANGED
@@ -198,7 +198,7 @@ if __name__ == '__main__':
198
198
  )
199
199
 
200
200
  with gr.Blocks() as app:
201
- gr.Markdown("# MiniCPMV-RAG-PDFQA: Two Vision Language Models Enable End-to-End RAG")
201
+ gr.Markdown("# RAG-PDFQA: Two Vision Language Models Enable End-to-End RAG")
202
202
 
203
203
  file_input = gr.File(type="binary", label="Step 1: Upload PDF")
204
204
  file_result = gr.Text(label="Knowledge Base ID")
@@ -217,7 +217,7 @@ if __name__ == '__main__':
217
217
  inputs=[kb_id_input, query_input, topk_input], outputs=images_output)
218
218
 
219
219
  button = gr.Button("Answer Question")
220
- gen_model_response = gr.Textbox(label="MiniCPM-V-2.6's Answer")
220
+ gen_model_response = gr.Textbox(label="Answer")
221
221
 
222
222
  button.click(lambda images, question: answer_question(images, question, gen_model),
223
223
  inputs=[images_output, query_input], outputs=gen_model_response)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: hjxdl
3
- Version: 0.2.18
3
+ Version: 0.2.20
4
4
  Summary: A collection of functions for Jupyter notebooks
5
5
  Home-page: https://github.com/huluxiaohuowa/hdl
6
6
  Author: Jianxing Hu
@@ -1,5 +1,5 @@
1
1
  hdl/__init__.py,sha256=GffnD0jLJdhkd-vo989v40N90sQbofkayRBwxc6TVhQ,72
2
- hdl/_version.py,sha256=vv03nZpeD6Mjdm460ETGqf51aVxTkGAfdVq6sFIgFOM,413
2
+ hdl/_version.py,sha256=a4xkHHagVd1Q5fDYBTP_mD5TwWjJASwGjEaI_TLYo5w,413
3
3
  hdl/args/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  hdl/args/loss_args.py,sha256=s7YzSdd7IjD24rZvvOrxLLFqMZQb9YylxKeyelSdrTk,70
5
5
  hdl/controllers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -132,14 +132,13 @@ hdl/utils/llm/chatgr.py,sha256=GO2G7g6YybduA5VCUuGjvEsJfC_6L7rycSnPeHMcxyM,2820
132
132
  hdl/utils/llm/embs.py,sha256=Tf0FOYrOFZp7qQpEPiSCXzlgyHH0X9HVTUtsup74a9E,7174
133
133
  hdl/utils/llm/extract.py,sha256=2sK_WJzmYIc8iuWaM9DA6Nw3_6q1O4lJ5pKpcZo-bBA,6512
134
134
  hdl/utils/llm/llama_chat.py,sha256=watcHGOaz-bv3x-yDucYlGk5f8FiqfFhwWogrl334fk,4387
135
- hdl/utils/llm/ocrrag.py,sha256=rwCfIzgHCxPBaG0JkzJiC-4Ci3NjMx5h5OYVxL1EmSg,9021
136
135
  hdl/utils/llm/vis.py,sha256=-6QvxSVzKqxLh_l0aYg2wN2G5HOiQvCpfp-jn9twXw0,16210
137
- hdl/utils/llm/visrag.py,sha256=vNj4cHsvfC_Vc0eDPKZc-yflLUMGApZGpggjAqAlwS8,9215
136
+ hdl/utils/llm/visrag.py,sha256=_PuKtmQIXD5bnmXwDWhTLdzOhgC42JiqdMNb1uKA7n8,9190
138
137
  hdl/utils/schedulers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
139
138
  hdl/utils/schedulers/norm_lr.py,sha256=bDwCmdEK-WkgxQMFBiMuchv8Mm7C0-GZJ6usm-PQk14,4461
140
139
  hdl/utils/weather/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
141
140
  hdl/utils/weather/weather.py,sha256=k11o6wM15kF8b9NMlEfrg68ak-SfSYLN3nOOflFUv-I,4381
142
- hjxdl-0.2.18.dist-info/METADATA,sha256=couEd-CAvz1xZXgTmw1Qd0uiTlBrJcATj-b0v0YRAA8,836
143
- hjxdl-0.2.18.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
144
- hjxdl-0.2.18.dist-info/top_level.txt,sha256=-kxwTM5JPhylp06z3zAVO3w6_h7wtBfBo2zgM6YZoTk,4
145
- hjxdl-0.2.18.dist-info/RECORD,,
141
+ hjxdl-0.2.20.dist-info/METADATA,sha256=DuluBVeDJWGaS5su2wfbIGZpiifkMoaC6E4h13137G0,836
142
+ hjxdl-0.2.20.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
143
+ hjxdl-0.2.20.dist-info/top_level.txt,sha256=-kxwTM5JPhylp06z3zAVO3w6_h7wtBfBo2zgM6YZoTk,4
144
+ hjxdl-0.2.20.dist-info/RECORD,,
hdl/utils/llm/ocrrag.py DELETED
@@ -1,225 +0,0 @@
1
- import argparse
2
- from PIL import Image
3
- import hashlib
4
- import torch
5
- import fitz
6
- import gradio as gr
7
- import os
8
- import numpy as np
9
- import json
10
- from transformers import AutoModel, AutoTokenizer
11
-
12
- from .chat import OpenAI_M
13
- from .vis import pilimg_to_base64
14
-
15
- def get_image_md5(img: Image.Image):
16
- img_byte_array = img.tobytes()
17
- hash_md5 = hashlib.md5()
18
- hash_md5.update(img_byte_array)
19
- hex_digest = hash_md5.hexdigest()
20
- return hex_digest
21
-
22
- def calculate_md5_from_binary(binary_data):
23
- hash_md5 = hashlib.md5()
24
- hash_md5.update(binary_data)
25
- return hash_md5.hexdigest()
26
-
27
- def add_pdf_gradio(pdf_file_binary, progress=gr.Progress(), cache_dir=None, model=None, tokenizer=None):
28
- model.eval()
29
-
30
- knowledge_base_name = calculate_md5_from_binary(pdf_file_binary)
31
-
32
- this_cache_dir = os.path.join(cache_dir, knowledge_base_name)
33
- os.makedirs(this_cache_dir, exist_ok=True)
34
-
35
- with open(os.path.join(this_cache_dir, f"src.pdf"), 'wb') as file:
36
- file.write(pdf_file_binary)
37
-
38
- dpi = 200
39
- doc = fitz.open("pdf", pdf_file_binary)
40
-
41
- reps_list = []
42
- images = []
43
- image_md5s = []
44
-
45
- for page in progress.tqdm(doc):
46
- pix = page.get_pixmap(dpi=dpi)
47
- image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
48
- image_md5 = get_image_md5(image)
49
- image_md5s.append(image_md5)
50
- with torch.no_grad():
51
- reps = model(text=[''], image=[image], tokenizer=tokenizer).reps
52
- reps_list.append(reps.squeeze(0).cpu().numpy())
53
- images.append(image)
54
-
55
- for idx in range(len(images)):
56
- image = images[idx]
57
- image_md5 = image_md5s[idx]
58
- cache_image_path = os.path.join(this_cache_dir, f"{image_md5}.png")
59
- image.save(cache_image_path)
60
-
61
- np.save(os.path.join(this_cache_dir, f"reps.npy"), reps_list)
62
-
63
- with open(os.path.join(this_cache_dir, f"md5s.txt"), 'w') as f:
64
- for item in image_md5s:
65
- f.write(item+'\n')
66
-
67
- return knowledge_base_name
68
-
69
- def retrieve_gradio(knowledge_base, query, topk, cache_dir=None, model=None, tokenizer=None):
70
- model.eval()
71
-
72
- target_cache_dir = os.path.join(cache_dir, knowledge_base)
73
-
74
- if not os.path.exists(target_cache_dir):
75
- return None
76
-
77
- md5s = []
78
- with open(os.path.join(target_cache_dir, f"md5s.txt"), 'r') as f:
79
- for line in f:
80
- md5s.append(line.rstrip('\n'))
81
-
82
- doc_reps = np.load(os.path.join(target_cache_dir, f"reps.npy"))
83
-
84
- query_with_instruction = "Represent this query for retrieving relevant document: " + query
85
- with torch.no_grad():
86
- query_rep = model(text=[query_with_instruction], image=[None], tokenizer=tokenizer).reps.squeeze(0).cpu()
87
-
88
- query_md5 = hashlib.md5(query.encode()).hexdigest()
89
-
90
- doc_reps_cat = torch.stack([torch.Tensor(i) for i in doc_reps], dim=0)
91
-
92
- similarities = torch.matmul(query_rep, doc_reps_cat.T)
93
-
94
- topk_values, topk_doc_ids = torch.topk(similarities, k=topk)
95
-
96
- images_topk = [Image.open(os.path.join(target_cache_dir, f"{md5s[idx]}.png")) for idx in topk_doc_ids.cpu().numpy()]
97
-
98
- with open(os.path.join(target_cache_dir, f"q-{query_md5}.json"), 'w') as f:
99
- f.write(json.dumps(
100
- {
101
- "knowledge_base": knowledge_base,
102
- "query": query,
103
- "retrieved_docs": [os.path.join(target_cache_dir, f"{md5s[idx]}.png") for idx in topk_doc_ids.cpu().numpy()]
104
- }, indent=4, ensure_ascii=False
105
- ))
106
-
107
- return images_topk
108
-
109
- def answer_question_stream(images, question, gen_model):
110
- # Load images from the image paths in images[0]
111
- pil_images = [Image.open(image[0]).convert('RGB') for image in images]
112
-
113
- # Calculate the total size of the new image (for vertical concatenation)
114
- widths, heights = zip(*(img.size for img in pil_images))
115
-
116
- # Assuming vertical concatenation, so width is the max width, height is the sum of heights
117
- total_width = max(widths)
118
- total_height = sum(heights)
119
-
120
- # Create a new blank image with the total width and height
121
- new_image = Image.new('RGB', (total_width, total_height))
122
-
123
- # Paste each image into the new image
124
- y_offset = 0
125
- for img in pil_images:
126
- new_image.paste(img, (0, y_offset))
127
- y_offset += img.height # Move the offset down by the height of the image
128
-
129
- # Convert the concatenated image to base64
130
- new_image_base64 = pilimg_to_base64(new_image)
131
-
132
- # Stream the answer from the model
133
- partial_answer_text = ""
134
- for partial_answer in gen_model.chat(
135
- prompt=question,
136
- images=[new_image_base64], # Use the concatenated image
137
- stream=True # Enable streaming
138
- ):
139
- partial_answer_text += partial_answer
140
- yield gr.update(value=partial_answer_text)
141
-
142
- def upvote(knowledge_base, query, cache_dir):
143
- target_cache_dir = os.path.join(cache_dir, knowledge_base)
144
- query_md5 = hashlib.md5(query.encode()).hexdigest()
145
-
146
- with open(os.path.join(target_cache_dir, f"q-{query_md5}.json"), 'r') as f:
147
- data = json.loads(f.read())
148
-
149
- data["user_preference"] = "upvote"
150
-
151
- with open(os.path.join(target_cache_dir, f"q-{query_md5}-withpref.json"), 'w') as f:
152
- f.write(json.dumps(data, indent=4, ensure_ascii=False))
153
-
154
- def downvote(knowledge_base, query, cache_dir):
155
- target_cache_dir = os.path.join(cache_dir, knowledge_base)
156
- query_md5 = hashlib.md5(query.encode()).hexdigest()
157
-
158
- with open(os.path.join(target_cache_dir, f"q-{query_md5}.json"), 'r') as f:
159
- data = json.loads(f.read())
160
-
161
- data["user_preference"] = "downvote"
162
-
163
- with open(os.path.join(target_cache_dir, f"q-{query_md5}-withpref.json"), 'w') as f:
164
- f.write(json.dumps(data, indent=4, ensure_ascii=False))
165
-
166
- if __name__ == '__main__':
167
- parser = argparse.ArgumentParser(description="MiniCPMV-RAG-PDFQA Script")
168
- parser.add_argument('--cache-dir', dest='cache_dir', type=str, required=True, help='Cache directory path')
169
- parser.add_argument('--device', dest='device', type=str, default='cuda:0', help='Device for model inference')
170
- parser.add_argument('--model-path', dest='model_path', type=str, required=True, help='Path to the embedding model')
171
- parser.add_argument('--llm-host', dest='llm_host', type=str, default='127.0.0.1', help='LLM server IP address')
172
- parser.add_argument('--llm-port', dest='llm_port', type=int, default=22299, help='LLM server port')
173
- parser.add_argument('--server-name', dest='server_name', type=str, default='0.0.0.0', help='Gradio server name')
174
- parser.add_argument('--server-port', dest='server_port', type=int, default=10077, help='Gradio server port')
175
-
176
- args = parser.parse_args()
177
-
178
- print("Loading embedding model...")
179
- tokenizer = AutoTokenizer.from_pretrained(args.model_path, trust_remote_code=True)
180
- model = AutoModel.from_pretrained(args.model_path, trust_remote_code=True)
181
- model.to(args.device)
182
- model.eval()
183
- print("Embedding model loaded!")
184
-
185
- gen_model = OpenAI_M(
186
- server_ip=args.llm_host,
187
- server_port=args.llm_port
188
- )
189
-
190
- with gr.Blocks() as app:
191
- gr.Markdown("# MiniCPMV-RAG-PDFQA: Two Vision Language Models Enable End-to-End RAG")
192
-
193
- file_input = gr.File(type="binary", label="Step 1: Upload PDF")
194
- file_result = gr.Text(label="Knowledge Base ID")
195
- process_button = gr.Button("Process PDF")
196
-
197
- process_button.click(lambda pdf: add_pdf_gradio(pdf, cache_dir=args.cache_dir, model=model, tokenizer=tokenizer),
198
- inputs=file_input, outputs=file_result)
199
-
200
- kb_id_input = gr.Text(label="Knowledge Base ID")
201
- query_input = gr.Text(label="Your Question")
202
- topk_input = gr.Number(value=5, minimum=1, maximum=10, step=1, label="Number of pages to retrieve")
203
- retrieve_button = gr.Button("Retrieve Pages")
204
- images_output = gr.Gallery(label="Retrieved Pages")
205
-
206
- retrieve_button.click(lambda kb, query, topk: retrieve_gradio(kb, query, topk, cache_dir=args.cache_dir, model=model, tokenizer=tokenizer),
207
- inputs=[kb_id_input, query_input, topk_input], outputs=images_output)
208
-
209
- button = gr.Button("Answer Question")
210
- gen_model_response = gr.Textbox(label="MiniCPM-V-2.6's Answer", lines=10)
211
-
212
- # Use answer_question_stream for streaming response and pass gen_model
213
- button.click(lambda images, query: answer_question_stream(images, query, gen_model),
214
- inputs=[images_output, query_input],
215
- outputs=gen_model_response)
216
-
217
- upvote_button = gr.Button("🤗 Upvote")
218
- downvote_button = gr.Button("🤣 Downvote")
219
-
220
- upvote_button.click(lambda kb, query: upvote(kb, query, cache_dir=args.cache_dir),
221
- inputs=[kb_id_input, query_input], outputs=None)
222
- downvote_button.click(lambda kb, query: downvote(kb, query, cache_dir=args.cache_dir),
223
- inputs=[kb_id_input, query_input], outputs=None)
224
-
225
- app.launch(server_name=args.server_name, server_port=args.server_port)
File without changes