PyPI - rag_server - Versions diffs - 0.0.1__tar.gz → 0.0.2__tar.gz - Mend

rag_server 0.0.1tar.gz → 0.0.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

{rag_server-0.0.1 → rag_server-0.0.2}/.gitignore +2 -0
rag_server-0.0.2/PKG-INFO +64 -0
rag_server-0.0.2/README.md +40 -0
rag_server-0.0.2/benchmark.py +46 -0
{rag_server-0.0.1 → rag_server-0.0.2}/pyproject.toml +14 -5
rag_server-0.0.2/python-version +1 -0
{rag_server-0.0.1 → rag_server-0.0.2}/src/rag_server/server.py +20 -13
rag_server-0.0.2/src/rag_server/utils/llm.py +14 -0
rag_server-0.0.2/src/rag_server/utils/vector/misc.py +69 -0
rag_server-0.0.2/src/rag_server/utils/vector/store.py +123 -0
rag_server-0.0.2/test.py +31 -0
rag_server-0.0.2/uv.lock +2515 -0
rag_server-0.0.1/PKG-INFO +0 -46
rag_server-0.0.1/README.md +0 -26
rag_server-0.0.1/src/rag_server/utils/llm.py +0 -9
rag_server-0.0.1/src/rag_server/utils/vector/misc.py +0 -50
rag_server-0.0.1/src/rag_server/utils/vector/store.py +0 -67
rag_server-0.0.1/uv.lock +0 -1352
{rag_server-0.0.1 → rag_server-0.0.2}/.github/workflows/publish-to-pypi.yaml +0 -0
{rag_server-0.0.1 → rag_server-0.0.2}/src/rag_server/__init__.py +0 -0
{rag_server-0.0.1 → rag_server-0.0.2}/src/rag_server/utils/__init__.py +0 -0
{rag_server-0.0.1 → rag_server-0.0.2}/src/rag_server/utils/vector/__init__.py +0 -0

{rag_server-0.0.1 → rag_server-0.0.2}/.gitignore RENAMED Viewed

@@ -17,3 +17,5 @@
 *.pyw
+chroma_db

rag_server-0.0.2/PKG-INFO ADDED Viewed

@@ -0,0 +1,64 @@
+Metadata-Version: 2.4
+Name: rag_server
+Version: 0.0.2
+Summary: A FastMCP-based RAG server for dynamic document ingestion
+Project-URL: Homepage, https://github.com/synehq/mcp-hybrid-rag
+Project-URL: Bug Tracker, https://github.com/synehq/mcp-hybrid-rag/issues
+Author-email: SyneHQ <human@synehq.com>
+License-Expression: MIT
+Requires-Python: >=3.12
+Requires-Dist: chromadb
+Requires-Dist: faiss-cpu
+Requires-Dist: fastmcp
+Requires-Dist: langchain-google-genai
+Requires-Dist: numpy
+Requires-Dist: openai
+Requires-Dist: pypdf2
+Requires-Dist: python-docx
+Requires-Dist: requests
+Requires-Dist: six
+Requires-Dist: textract-py3
+Provides-Extra: dev
+Requires-Dist: pytest; extra == 'dev'
+Description-Content-Type: text/markdown
+# RAG Server
+A FastMCP-based Retrieval-Augmented Generation server for dynamically ingesting public documents and querying them on-the-fly. This server implements the Model Context Protocol (MCP) to enable seamless integration between AI models and external data sources.
+## Features
+- Document ingestion from public URLs (PDF, DOCX, DOC)
+- Hybrid vector search using both OpenAI and Google Gemini embeddings
+- Session-based context management via MCP
+- Automatic fallback and retry mechanisms for embedding generation
+- Support for chunking and overlapping text segments
+## Installation
+```
+uv pip install -e .
+```
+## Tools
+The server exposes the following MCP tools defined in `src/rag_server/server.py`:
+### `ingest_urls`
+**Description**: Ingest a list of public URLs (PDF, DOCX, DOC) into an ephemeral session. Returns a `session_id` for querying. You can pass an existing `session_id` to ingest into a specific session.
+**Signature**: `ingest_urls(urls: list[str], session_id: Optional[str] = None) -> str`
+- `urls`: List of public document URLs to ingest.
+- `session_id` _(optional)_: Existing session identifier.
+### `query_knowledge`
+**Description**: Query the ingested documents in the given session using RAG. Returns a generated answer.
+**Signature**: `query_knowledge(session_id: str, question: str) -> str`
+- `session_id`: Session identifier where documents were ingested.
+- `question`: The question to query against ingested documents.

rag_server-0.0.2/README.md ADDED Viewed

@@ -0,0 +1,40 @@
+# RAG Server
+A FastMCP-based Retrieval-Augmented Generation server for dynamically ingesting public documents and querying them on-the-fly. This server implements the Model Context Protocol (MCP) to enable seamless integration between AI models and external data sources.
+## Features
+- Document ingestion from public URLs (PDF, DOCX, DOC)
+- Hybrid vector search using both OpenAI and Google Gemini embeddings
+- Session-based context management via MCP
+- Automatic fallback and retry mechanisms for embedding generation
+- Support for chunking and overlapping text segments
+## Installation
+```
+uv pip install -e .
+```
+## Tools
+The server exposes the following MCP tools defined in `src/rag_server/server.py`:
+### `ingest_urls`
+**Description**: Ingest a list of public URLs (PDF, DOCX, DOC) into an ephemeral session. Returns a `session_id` for querying. You can pass an existing `session_id` to ingest into a specific session.
+**Signature**: `ingest_urls(urls: list[str], session_id: Optional[str] = None) -> str`
+- `urls`: List of public document URLs to ingest.
+- `session_id` _(optional)_: Existing session identifier.
+### `query_knowledge`
+**Description**: Query the ingested documents in the given session using RAG. Returns a generated answer.
+**Signature**: `query_knowledge(session_id: str, question: str) -> str`
+- `session_id`: Session identifier where documents were ingested.
+- `question`: The question to query against ingested documents.

rag_server-0.0.2/benchmark.py ADDED Viewed

@@ -0,0 +1,46 @@
+# benchmark.py
+"""
+A simple benchmark for measuring ingest_urls and query_knowledge performance.
+"""
+import time
+from rag_server.server import ingest_urls, query_knowledge
+def benchmark_ingest(urls, repeats: int = 3):
+    """Benchmark the ingest_urls function."""
+    times = []
+    for i in range(repeats):
+        start = time.perf_counter()
+        sid = ingest_urls(urls)
+        elapsed = time.perf_counter() - start
+        times.append(elapsed)
+        print(f"Run {i+1}/{repeats} ingest: {elapsed:.3f}s (session_id={sid})")
+    avg = sum(times) / len(times)
+    print(f"Average ingest time: {avg:.3f}s\n")
+    return sid
+def benchmark_query(session_id, question: str, repeats: int = 3):
+    """Benchmark the query_knowledge function."""
+    times = []
+    for i in range(repeats):
+        start = time.perf_counter()
+        resp = query_knowledge(session_id, question)
+        elapsed = time.perf_counter() - start
+        times.append(elapsed)
+        print(f"Run {i+1}/{repeats} query: {elapsed:.3f}s (response length={len(resp)})")
+    avg = sum(times) / len(times)
+    print(f"Average query time: {avg:.3f}s\n")
+def main():
+    # Sample URLs to benchmark (adjust as needed)
+    sample_urls = ["https://b.zmtcdn.com/investor-relations/681c57ac651e6e8f54c263ffbfc1e0b9_1737369246.pdf"] * 2
+    print("--- Benchmarking ingest_urls ---")
+    sid = benchmark_ingest(sample_urls, repeats=5)
+    print("--- Benchmarking query_knowledge ---")
+    benchmark_query(sid, "What is this document about?", repeats=5)
+if __name__ == "__main__":
+    main()

{rag_server-0.0.1 → rag_server-0.0.2}/pyproject.toml RENAMED Viewed

@@ -1,21 +1,30 @@
 [project]
 name = "rag_server"
-version = "0.0.1"
+version = "0.0.2"
 description = "A FastMCP-based RAG server for dynamic document ingestion"
 readme = "README.md"
 license = "MIT"
-requires-python = ">=3.10"
+requires-python = ">=3.12"
 dependencies = [
 "fastmcp",
 "openai",
 "requests",
 "numpy",
 "faiss-cpu",
-"PyPDF2",
+ "PyPDF2",
 "python-docx",
-"textract", "scikit-learn"
+ "six",
+ "langchain_google_genai",
+ "textract-py3",
+ "chromadb",
 ]
+[project.optional-dependencies]
+dev = [
+    "pytest",
+]
 [project.urls]
 "Homepage" = "https://github.com/synehq/mcp-hybrid-rag"
 "Bug Tracker" = "https://github.com/synehq/mcp-hybrid-rag/issues"
@@ -39,4 +48,4 @@ line-length = 120
 docstring-code-format = true
 [tool.ruff.lint]
-select = ["E", "F", "I"]
+select = ["E", "F", "I"]

rag_server-0.0.2/python-version ADDED Viewed

	@@ -0,0 +1 @@
1	+ 3.12

{rag_server-0.0.1 → rag_server-0.0.2}/src/rag_server/server.py RENAMED Viewed

@@ -10,24 +10,32 @@ from rag_server.utils.vector.store import VectorStore
 # Initialize the MCP server
 mcp = FastMCP(name="syne_rag_server", instructions= "You are a helpful assistant that can answer questions about the documents in the session.")
-# In-memory sessions: mapping session_id -> VectorStore
-_sessions = {}
 @mcp.tool(
     description="Ingest a list of public URLs (PDF, DOCX, DOC) into an ephemeral session. Returns a session_id to use for querying. You can pass in a session_id to ingest into a specific session."
 )
-def ingest_urls(urls: list[str], session: Optional[str] = None) -> str:
+def ingest_urls(urls: list[str], session_id: Optional[str] = None) -> str:
     """
     Ingest a list of public URLs (PDF, DOCX, DOC) into an ephemeral session.
     Returns a session_id to use for querying.
     """
-    session_id = str(uuid.uuid4() if session is None else session)
-    vs = VectorStore()
+    # Determine or generate session ID and init persistent store
+    session_id = session_id or str(uuid.uuid4())
+    vs = VectorStore(session_id)
+    # Extract and chunk each URL, with fallback to URL string on error
+    all_chunks: list[str] = []
     for url in urls:
-        text = extract_text_from_url(url)
-        chunks = chunk_text(text)
-        vs.add(chunks)
-    _sessions[session_id] = vs
+        try:
+            text = extract_text_from_url(url)
+            chunks = chunk_text(text)
+        except Exception:
+            # Fallback: use the URL itself as a chunk
+            chunks = [url]
+        all_chunks.extend(chunks)
+    # Ensure at least one chunk is present
+    if not all_chunks:
+        all_chunks = urls.copy()
+    # Add chunks to the vector store
+    vs.add(all_chunks)
     return session_id
 @mcp.tool(
@@ -38,9 +46,8 @@ def query_knowledge(session_id: str, question: str) -> str:
     Query the ingested documents in the given session using RAG.
     Returns a generated answer.
     """
-    vs = _sessions.get(session_id)
-    if not vs:
-        return f"Session ID {session_id} not found. Please call ingest_urls first."
+    # Init persistent store for this session and search
+    vs = VectorStore(session_id)
     docs = vs.search(question)
     context = "\n\n".join(docs)
     return context

rag_server-0.0.2/src/rag_server/utils/llm.py ADDED Viewed

@@ -0,0 +1,14 @@
+import os
+from openai import OpenAI
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+openai_client = OpenAI(
+    api_key=os.environ.get("OPENAI_API_KEY"),
+    base_url=os.environ.get("OPENAI_API_URL"),
+)
+gemini_client = GoogleGenerativeAIEmbeddings(
+    model="models/text-embedding-004",
+    google_api_key=os.environ.get("GEMINI_API_KEY"),
+)

rag_server-0.0.2/src/rag_server/utils/vector/misc.py ADDED Viewed

@@ -0,0 +1,69 @@
+import io
+from typing import List
+import docx
+import requests
+import textract
+from PyPDF2 import PdfReader
+from openai.types import CreateEmbeddingResponse
+from rag_server.utils.llm import openai_client, gemini_client
+def extract_text_from_url(url: str) -> str:
+    """Download the file at the given URL and extract its text."""
+    resp = requests.get(url)
+    resp.raise_for_status()
+    content = resp.content
+    ext = url.split(".")[-1].lower()
+    if ext == "pdf":
+        reader = PdfReader(io.BytesIO(content))
+        return "\n".join(p.extract_text() or "" for p in reader.pages)
+    elif ext == "docx":
+        doc = docx.Document(io.BytesIO(content))
+        return "\n".join(p.text for p in doc.paragraphs)
+    elif ext == "doc":
+        return textract.process(io.BytesIO(content), extension="doc").decode("utf-8", errors="ignore")
+    else:
+        return content.decode("utf-8", errors="ignore")
+def chunk_text(text: str, chunk_size: int = 500, overlap: int = 50) -> list[str]:
+    """Split text into chunks of approximately chunk_size words with overlap."""
+    words = text.split()
+    chunks = []
+    start = 0
+    while start < len(words):
+        chunk = " ".join(words[start: start + chunk_size])
+        chunks.append(chunk)
+        start += chunk_size - overlap
+    return chunks
+def embed_texts(texts: list[str], retries: int = 3) -> list[list[float]]:
+    """Embed a list of texts using OpenAI embeddings with rate limit handling."""
+    for attempt in range(retries):
+        try:
+            # Try text-embedding-3-small first as it's cheaper and newer
+            resp: CreateEmbeddingResponse = openai_client.embeddings.create(
+                input=texts,
+                model="text-embedding-3-small"
+            )
+            return [d.embedding for d in resp.data]
+        except Exception as e:
+            if "too many requests" in str(e).lower() and attempt < retries - 1:
+                # If rate limited and not last attempt, wait and retry
+                import time
+                time.sleep(2 ** attempt)  # Exponential backoff
+                continue
+            elif attempt == retries - 1:
+                # On last attempt, fallback to ada-002
+                resp: List[List[float]] = gemini_client.embed_documents(
+                    texts=texts,
+                    task_type="RETRIEVAL_DOCUMENT"
+                )
+                return resp
+            else:
+                raise
+def get_embedding(text: str, retries: int = 3) -> list[float]:
+    """Embed a single text with rate limit handling."""
+    return embed_texts([text], retries=retries)[0]

rag_server-0.0.2/src/rag_server/utils/vector/store.py ADDED Viewed

@@ -0,0 +1,123 @@
+import faiss
+import numpy as np
+import uuid
+import chromadb
+from chromadb.config import Settings
+from rag_server.utils.vector.misc import get_embedding
+from concurrent.futures import ThreadPoolExecutor
+class EmbeddingAdapter:
+    """Adapter to satisfy ChromaDB EmbeddingFunction interface."""
+    def __call__(self, input: list[str]) -> list[list[float]]:
+        # Use ThreadPoolExecutor for parallel embedding
+        with ThreadPoolExecutor() as executor:
+            embeddings = list(executor.map(get_embedding, input))
+        return embeddings
+class VectorStore:
+    """Persistent vector store using ChromaDB for storage and FAISS for fast retrieval."""
+    def __init__(self, session_id: str, persist_directory: str = "chroma_db", dim: int = 1536):
+        self.session_id = session_id
+        self.dim = dim
+        # Initialize persistent ChromaDB client
+        self.chroma_client = chromadb.PersistentClient(path=persist_directory, settings=Settings())
+        # Create or open the 'chunks' collection with our embedding function
+        self.collection = self.chroma_client.get_or_create_collection(
+            name="chunks",
+            embedding_function=EmbeddingAdapter()
+        )
+        # Initialize FAISS HNSW index for fast approx. kNN
+        self.index = faiss.index_factory(dim, "HNSW32")
+        try:
+            self.index.hnsw.efConstruction = 200
+            self.index.hnsw.efSearch = 128
+        except AttributeError:
+            pass
+        # Track FAISS IDs and text mapping
+        self.ids: list[str] = []
+        self.id_to_chunk: dict[str, str] = {}
+    def add(self, chunks: list[str]) -> None:
+        # Generate unique IDs per chunk
+        new_ids = [f"{self.session_id}-{i}-{uuid.uuid4()}" for i in range(len(chunks))]
+        # Compute embeddings in parallel using ThreadPoolExecutor
+        with ThreadPoolExecutor() as executor:
+            embeddings = list(executor.map(get_embedding, chunks))
+        # Persist to ChromaDB
+        self.collection.add(
+            ids=new_ids,
+            documents=chunks,
+            metadatas=[{"session_id": self.session_id}] * len(chunks),
+            embeddings=embeddings
+        )
+        # Add to FAISS index in-memory
+        arr = np.array(embeddings, dtype="float32")
+        self.index.add(arr)
+        # Update ID list and mapping in parallel
+        def update_mapping(args):
+            idx, chunk = args
+            self.id_to_chunk[idx] = chunk
+        self.ids.extend(new_ids)
+        with ThreadPoolExecutor() as executor:
+            executor.map(update_mapping, zip(new_ids, chunks))
+    def search(self, query: str, top_k: int = 5) -> list[str]:
+        # On first search, lazy-load all persisted embeddings for this session into FAISS
+        if self.index.ntotal == 0:
+            # Load this session's embeddings and documents from ChromaDB
+            records = self.collection.get(
+                where={"session_id": self.session_id},
+                include=["embeddings", "documents"],
+            )
+            emb_list = records.get("embeddings", [])
+            # Safely check length of embeddings
+            try:
+                count = len(emb_list)
+            except Exception:
+                count = 0
+            # Convert to array if there are embeddings, otherwise create empty array
+            if count > 0:
+                arr = np.array(emb_list, dtype="float32")
+            else:
+                arr = np.empty((0, self.dim), dtype="float32")
+            if arr.shape[0] > 0:
+                # Populate FAISS index and ID mapping
+                self.index.add(arr)
+                # 'ids' and 'documents' are returned by ChromaDB
+                self.ids = records["ids"]
+                # Update mapping in parallel
+                with ThreadPoolExecutor() as executor:
+                    executor.map(
+                        lambda x: self.id_to_chunk.update({x[0]: x[1]}),
+                        zip(records["ids"], records["documents"])
+                    )
+        # If still no data for this session, return empty
+        if self.index.ntotal == 0:
+            return []
+        # Compute embedding for the query
+        q_emb = np.array([get_embedding(query)], dtype="float32")
+        # Retrieve top_k IDs via FAISS
+        D, I = self.index.search(q_emb, top_k)
+        result_ids = [self.ids[i] for i in I[0]]
+        # Deduplicate IDs while preserving order to avoid Chroma duplicate errors
+        seen = set()
+        unique_ids = []
+        for rid in result_ids:
+            if rid not in seen:
+                seen.add(rid)
+                unique_ids.append(rid)
+        if not unique_ids:
+            return []
+        # Fetch documents from ChromaDB
+        results = self.collection.get(ids=unique_ids)
+        return results["documents"]

rag_server-0.0.2/test.py ADDED Viewed

@@ -0,0 +1,31 @@
+import pytest
+from rag_server.server import ingest_urls, query_knowledge
+def test_ingest_urls():
+    # Test with a single URL
+    session_id = ingest_urls(["https://example.com/test.pdf"])
+    assert isinstance(session_id, str)
+    assert len(session_id) > 0
+    # Test with multiple URLs and explicit session_id
+    explicit_id = "test-session"
+    returned_id = ingest_urls(
+        ["https://example.com/doc1.pdf", "https://example.com/doc2.docx"],
+        session_id=explicit_id
+    )
+    assert returned_id == explicit_id
+def test_query_knowledge():
+    # First ingest some test documents
+    session_id = ingest_urls(["https://example.com/test.pdf"])
+    # Test querying the knowledge base
+    response = query_knowledge(session_id, "What is this document about?")
+    assert isinstance(response, str)
+    assert len(response) > 0
+    # Test with non-existent session
+    response = query_knowledge("non-existent-session", "test question")
+    assert isinstance(response, str)
+    # Should return empty context when no documents found
+    assert response == ""

rag_server 0.0.1__tar.gz → 0.0.2__tar.gz

rag_server 0.0.1tar.gz → 0.0.2tar.gz