PyPI - hjxdl - Versions diffs - 0.2.14__py3-none-any.whl → 0.2.16__py3-none-any.whl - Mend

hjxdl 0.2.14py3-none-any.whl → 0.2.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

hdl/_version.py +2 -2
hdl/utils/llm/ocrrag.py +224 -0
hdl/utils/llm/vis.py +20 -0
hdl/utils/llm/visrag.py +27 -6
{hjxdl-0.2.14.dist-info → hjxdl-0.2.16.dist-info}/METADATA +1 -1
{hjxdl-0.2.14.dist-info → hjxdl-0.2.16.dist-info}/RECORD +8 -7
{hjxdl-0.2.14.dist-info → hjxdl-0.2.16.dist-info}/WHEEL +0 -0
{hjxdl-0.2.14.dist-info → hjxdl-0.2.16.dist-info}/top_level.txt +0 -0

hdl/_version.py CHANGED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.2.14'
-__version_tuple__ = version_tuple = (0, 2, 14)
+__version__ = version = '0.2.16'
+__version_tuple__ = version_tuple = (0, 2, 16)

hdl/utils/llm/ocrrag.py ADDED Viewed

@@ -0,0 +1,224 @@
+import argparse
+from PIL import Image
+import hashlib
+import torch
+import fitz
+import gradio as gr
+import os
+import numpy as np
+import json
+from transformers import AutoModel, AutoTokenizer
+from .chat import OpenAI_M
+from .vis import pilimg_to_base64
+def get_image_md5(img: Image.Image):
+    img_byte_array = img.tobytes()
+    hash_md5 = hashlib.md5()
+    hash_md5.update(img_byte_array)
+    hex_digest = hash_md5.hexdigest()
+    return hex_digest
+def calculate_md5_from_binary(binary_data):
+    hash_md5 = hashlib.md5()
+    hash_md5.update(binary_data)
+    return hash_md5.hexdigest()
+def add_pdf_gradio(pdf_file_binary, progress=gr.Progress(), cache_dir=None, model=None, tokenizer=None):
+    model.eval()
+    knowledge_base_name = calculate_md5_from_binary(pdf_file_binary)
+    this_cache_dir = os.path.join(cache_dir, knowledge_base_name)
+    os.makedirs(this_cache_dir, exist_ok=True)
+    with open(os.path.join(this_cache_dir, f"src.pdf"), 'wb') as file:
+        file.write(pdf_file_binary)
+    dpi = 200
+    doc = fitz.open("pdf", pdf_file_binary)
+    reps_list = []
+    images = []
+    image_md5s = []
+    for page in progress.tqdm(doc):
+        pix = page.get_pixmap(dpi=dpi)
+        image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+        image_md5 = get_image_md5(image)
+        image_md5s.append(image_md5)
+        with torch.no_grad():
+            reps = model(text=[''], image=[image], tokenizer=tokenizer).reps
+        reps_list.append(reps.squeeze(0).cpu().numpy())
+        images.append(image)
+    for idx in range(len(images)):
+        image = images[idx]
+        image_md5 = image_md5s[idx]
+        cache_image_path = os.path.join(this_cache_dir, f"{image_md5}.png")
+        image.save(cache_image_path)
+    np.save(os.path.join(this_cache_dir, f"reps.npy"), reps_list)
+    with open(os.path.join(this_cache_dir, f"md5s.txt"), 'w') as f:
+        for item in image_md5s:
+            f.write(item+'\n')
+    return knowledge_base_name
+def retrieve_gradio(knowledge_base, query, topk, cache_dir=None, model=None, tokenizer=None):
+    model.eval()
+    target_cache_dir = os.path.join(cache_dir, knowledge_base)
+    if not os.path.exists(target_cache_dir):
+        return None
+    md5s = []
+    with open(os.path.join(target_cache_dir, f"md5s.txt"), 'r') as f:
+        for line in f:
+            md5s.append(line.rstrip('\n'))
+    doc_reps = np.load(os.path.join(target_cache_dir, f"reps.npy"))
+    query_with_instruction = "Represent this query for retrieving relevant document: " + query
+    with torch.no_grad():
+        query_rep = model(text=[query_with_instruction], image=[None], tokenizer=tokenizer).reps.squeeze(0).cpu()
+    query_md5 = hashlib.md5(query.encode()).hexdigest()
+    doc_reps_cat = torch.stack([torch.Tensor(i) for i in doc_reps], dim=0)
+    similarities = torch.matmul(query_rep, doc_reps_cat.T)
+    topk_values, topk_doc_ids = torch.topk(similarities, k=topk)
+    images_topk = [Image.open(os.path.join(target_cache_dir, f"{md5s[idx]}.png")) for idx in topk_doc_ids.cpu().numpy()]
+    with open(os.path.join(target_cache_dir, f"q-{query_md5}.json"), 'w') as f:
+        f.write(json.dumps(
+            {
+                "knowledge_base": knowledge_base,
+                "query": query,
+                "retrieved_docs": [os.path.join(target_cache_dir, f"{md5s[idx]}.png") for idx in topk_doc_ids.cpu().numpy()]
+            }, indent=4, ensure_ascii=False
+        ))
+    return images_topk
+def answer_question_stream(images, question, gen_model):
+    # Load images from the image paths in images[0]
+    pil_images = [Image.open(image[0]).convert('RGB') for image in images]
+    # Calculate the total size of the new image (for vertical concatenation)
+    widths, heights = zip(*(img.size for img in pil_images))
+    # Assuming vertical concatenation, so width is the max width, height is the sum of heights
+    total_width = max(widths)
+    total_height = sum(heights)
+    # Create a new blank image with the total width and height
+    new_image = Image.new('RGB', (total_width, total_height))
+    # Paste each image into the new image
+    y_offset = 0
+    for img in pil_images:
+        new_image.paste(img, (0, y_offset))
+        y_offset += img.height  # Move the offset down by the height of the image
+    # Convert the concatenated image to base64
+    new_image_base64 = pilimg_to_base64(new_image)
+    # Call the model with the base64-encoded concatenated image and stream=True
+    for partial_answer in gen_model.chat(
+        prompt=question,
+        images=[new_image_base64],  # Use the concatenated image
+        stream=True  # Enable streaming
+    ):
+        # Yield the partial answer as it comes in
+        yield partial_answer  # Stream the output to Gradio
+def upvote(knowledge_base, query, cache_dir):
+    target_cache_dir = os.path.join(cache_dir, knowledge_base)
+    query_md5 = hashlib.md5(query.encode()).hexdigest()
+    with open(os.path.join(target_cache_dir, f"q-{query_md5}.json"), 'r') as f:
+        data = json.loads(f.read())
+    data["user_preference"] = "upvote"
+    with open(os.path.join(target_cache_dir, f"q-{query_md5}-withpref.json"), 'w') as f:
+        f.write(json.dumps(data, indent=4, ensure_ascii=False))
+def downvote(knowledge_base, query, cache_dir):
+    target_cache_dir = os.path.join(cache_dir, knowledge_base)
+    query_md5 = hashlib.md5(query.encode()).hexdigest()
+    with open(os.path.join(target_cache_dir, f"q-{query_md5}.json"), 'r') as f:
+        data = json.loads(f.read())
+    data["user_preference"] = "downvote"
+    with open(os.path.join(target_cache_dir, f"q-{query_md5}-withpref.json"), 'w') as f:
+        f.write(json.dumps(data, indent=4, ensure_ascii=False))
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description="MiniCPMV-RAG-PDFQA Script")
+    parser.add_argument('--cache-dir', dest='cache_dir', type=str, required=True, help='Cache directory path')
+    parser.add_argument('--device', dest='device', type=str, default='cuda:0', help='Device for model inference')
+    parser.add_argument('--model-path', dest='model_path', type=str, required=True, help='Path to the embedding model')
+    parser.add_argument('--llm-host', dest='llm_host', type=str, default='127.0.0.1', help='LLM server IP address')
+    parser.add_argument('--llm-port', dest='llm_port', type=int, default=22299, help='LLM server port')
+    parser.add_argument('--server-name', dest='server_name', type=str, default='0.0.0.0', help='Gradio server name')
+    parser.add_argument('--server-port', dest='server_port', type=int, default=10077, help='Gradio server port')
+    args = parser.parse_args()
+    print("Loading embedding model...")
+    tokenizer = AutoTokenizer.from_pretrained(args.model_path, trust_remote_code=True)
+    model = AutoModel.from_pretrained(args.model_path, trust_remote_code=True)
+    model.to(args.device)
+    model.eval()
+    print("Embedding model loaded!")
+    gen_model = OpenAI_M(
+        server_ip=args.llm_host,
+        server_port=args.llm_port
+    )
+    with gr.Blocks() as app:
+        gr.Markdown("# MiniCPMV-RAG-PDFQA: Two Vision Language Models Enable End-to-End RAG")
+        file_input = gr.File(type="binary", label="Step 1: Upload PDF")
+        file_result = gr.Text(label="Knowledge Base ID")
+        process_button = gr.Button("Process PDF")
+        process_button.click(lambda pdf: add_pdf_gradio(pdf, cache_dir=args.cache_dir, model=model, tokenizer=tokenizer),
+                             inputs=file_input, outputs=file_result)
+        kb_id_input = gr.Text(label="Knowledge Base ID")
+        query_input = gr.Text(label="Your Question")
+        topk_input = gr.Number(value=5, minimum=1, maximum=10, step=1, label="Number of pages to retrieve")
+        retrieve_button = gr.Button("Retrieve Pages")
+        images_output = gr.Gallery(label="Retrieved Pages")
+        retrieve_button.click(lambda kb, query, topk: retrieve_gradio(kb, query, topk, cache_dir=args.cache_dir, model=model, tokenizer=tokenizer),
+                              inputs=[kb_id_input, query_input, topk_input], outputs=images_output)
+        button = gr.Button("Answer Question")
+        gen_model_response = gr.Textbox(label="MiniCPM-V-2.6's Answer", lines=10)
+        # Use answer_question_stream for streaming response
+        button.click(answer_question_stream,
+                     inputs=[images_output, query_input],
+                     outputs=gen_model_response)
+        upvote_button = gr.Button("🤗 Upvote")
+        downvote_button = gr.Button("🤣 Downvote")
+        upvote_button.click(lambda kb, query: upvote(kb, query, cache_dir=args.cache_dir),
+                            inputs=[kb_id_input, query_input], outputs=None)
+        downvote_button.click(lambda kb, query: downvote(kb, query, cache_dir=args.cache_dir),
+                              inputs=[kb_id_input, query_input], outputs=None)
+    app.launch(server_name=args.server_name, server_port=args.server_port)

hdl/utils/llm/vis.py CHANGED Viewed

@@ -15,6 +15,7 @@ from redis.commands.search.indexDefinition import IndexDefinition, IndexType
 from hdl.jupyfuncs.show.pbar import tqdm
 from redis.commands.search.query import Query
 from ..database_tools.connect import conn_redis
@@ -89,6 +90,7 @@ def imgfile_to_base64(img_dir: str):
     return img_base64
 def imgbase64_to_pilimg(img_base64: str):
     """Converts a base64 encoded image to a PIL image.
@@ -107,6 +109,24 @@ def imgbase64_to_pilimg(img_base64: str):
     return img_pil
+def pilimg_to_base64(pilimg):
+    """Converts a PIL image to base64 format.
+    Args:
+        pilimg (PIL.Image): The PIL image to be converted.
+    Returns:
+        str: Base64 encoded image string.
+    """
+    buffered = BytesIO()
+    pilimg.save(buffered, format="PNG")
+    image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    img_format = 'png'
+    mime_type = f"image/{img_format}"
+    img_base64 = f"data:{mime_type};base64,{image_base64}"
+    return img_base64
 class ImgHandler:
     def __init__(
         self,

hdl/utils/llm/visrag.py CHANGED Viewed

@@ -12,7 +12,7 @@ import json
 from transformers import AutoModel, AutoTokenizer
 from .chat import OpenAI_M
-from .vis import imgfile_to_base64
+from .vis import pilimg_to_base64
 def get_image_md5(img: Image.Image):
     img_byte_array = img.tobytes()
@@ -116,14 +116,35 @@ def retrieve_gradio(knowledge_base, query, topk, cache_dir=None, model=None, tok
 #     return image_base64
 def answer_question(images, question, gen_model):
-    # Convert images to base64
-    # images_base64 = [convert_image_to_base64(Image.open(image[0]).convert('RGB')) for image in images]
-    images_base64 = [imgfile_to_base64(image[0]) for image in images]
+    # Load images from the image paths in images[0]
+    pil_images = [Image.open(image[0]).convert('RGB') for image in images]
-    # Pass base64-encoded images to gen_model.chat
+    # Calculate the total size of the new image (for vertical concatenation)
+    widths, heights = zip(*(img.size for img in pil_images))
+    # Assuming vertical concatenation, so width is the max width, height is the sum of heights
+    total_width = max(widths)
+    total_height = sum(heights)
+    # Create a new blank image with the total width and height
+    new_image = Image.new('RGB', (total_width, total_height))
+    # Paste each image into the new image
+    y_offset = 0
+    for img in pil_images:
+        new_image.paste(img, (0, y_offset))
+        y_offset += img.height  # Move the offset down by the height of the image
+    # Optionally save or display the final concatenated image (for debugging)
+    # new_image.save('concatenated_image.png')
+    # Convert the concatenated image to base64
+    new_image_base64 = pilimg_to_base64(new_image)
+    # Call the model with the base64-encoded concatenated image
     answer = gen_model.chat(
         prompt=question,
-        images=images_base64,  # Use the base64 images
+        images=[new_image_base64],  # Use the concatenated image
         stream=False
     )
     return answer

{hjxdl-0.2.14.dist-info → hjxdl-0.2.16.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: hjxdl
-Version: 0.2.14
+Version: 0.2.16
 Summary: A collection of functions for Jupyter notebooks
 Home-page: https://github.com/huluxiaohuowa/hdl
 Author: Jianxing Hu

{hjxdl-0.2.14.dist-info → hjxdl-0.2.16.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 hdl/__init__.py,sha256=GffnD0jLJdhkd-vo989v40N90sQbofkayRBwxc6TVhQ,72
-hdl/_version.py,sha256=KYrSahOPivF0LOfn4qq6iTibWNx1Db_9urh-NXAGe9E,413
+hdl/_version.py,sha256=24Q7k0pOfSN3Vkvs8-MWQxeJqcvAZ3JvN_YWtflaEyU,413
 hdl/args/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 hdl/args/loss_args.py,sha256=s7YzSdd7IjD24rZvvOrxLLFqMZQb9YylxKeyelSdrTk,70
 hdl/controllers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -132,13 +132,14 @@ hdl/utils/llm/chatgr.py,sha256=GO2G7g6YybduA5VCUuGjvEsJfC_6L7rycSnPeHMcxyM,2820
 hdl/utils/llm/embs.py,sha256=Tf0FOYrOFZp7qQpEPiSCXzlgyHH0X9HVTUtsup74a9E,7174
 hdl/utils/llm/extract.py,sha256=2sK_WJzmYIc8iuWaM9DA6Nw3_6q1O4lJ5pKpcZo-bBA,6512
 hdl/utils/llm/llama_chat.py,sha256=watcHGOaz-bv3x-yDucYlGk5f8FiqfFhwWogrl334fk,4387
-hdl/utils/llm/vis.py,sha256=2pI0439GWi_BEVfQJtY29Y72FkUa8jEvBeqMlwy7xkc,15716
-hdl/utils/llm/visrag.py,sha256=8IsY4e3AlzmyfR1bTQhHQq-Z5uxLHiN9kPu-b_byTKw,8411
+hdl/utils/llm/ocrrag.py,sha256=AxzoSZ9AHBJihTwxllprlukVYb0JI83GgvQDKHcJl-4,8982
+hdl/utils/llm/vis.py,sha256=-6QvxSVzKqxLh_l0aYg2wN2G5HOiQvCpfp-jn9twXw0,16210
+hdl/utils/llm/visrag.py,sha256=vNj4cHsvfC_Vc0eDPKZc-yflLUMGApZGpggjAqAlwS8,9215
 hdl/utils/schedulers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 hdl/utils/schedulers/norm_lr.py,sha256=bDwCmdEK-WkgxQMFBiMuchv8Mm7C0-GZJ6usm-PQk14,4461
 hdl/utils/weather/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 hdl/utils/weather/weather.py,sha256=k11o6wM15kF8b9NMlEfrg68ak-SfSYLN3nOOflFUv-I,4381
-hjxdl-0.2.14.dist-info/METADATA,sha256=_IIRb9CIkJLiuQ42cVgk9pJOevHNnqOoWEetESOYX2I,836
-hjxdl-0.2.14.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
-hjxdl-0.2.14.dist-info/top_level.txt,sha256=-kxwTM5JPhylp06z3zAVO3w6_h7wtBfBo2zgM6YZoTk,4
-hjxdl-0.2.14.dist-info/RECORD,,
+hjxdl-0.2.16.dist-info/METADATA,sha256=5jUlljbBjcD-EWi_2s4qYK3G2r7uoCSL4t7WinXNwmE,836
+hjxdl-0.2.16.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
+hjxdl-0.2.16.dist-info/top_level.txt,sha256=-kxwTM5JPhylp06z3zAVO3w6_h7wtBfBo2zgM6YZoTk,4
+hjxdl-0.2.16.dist-info/RECORD,,

{hjxdl-0.2.14.dist-info → hjxdl-0.2.16.dist-info}/WHEEL RENAMED Viewed

File without changes

{hjxdl-0.2.14.dist-info → hjxdl-0.2.16.dist-info}/top_level.txt RENAMED Viewed

File without changes

hjxdl 0.2.14__py3-none-any.whl → 0.2.16__py3-none-any.whl

hjxdl 0.2.14py3-none-any.whl → 0.2.16py3-none-any.whl