PyPI - rag_server - Versions diffs - 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl - Mend

rag_server 0.0.1py3-none-any.whl → 0.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

rag_server/server.py +20 -13
rag_server/utils/llm.py +5 -0
rag_server/utils/vector/misc.py +30 -11
rag_server/utils/vector/store.py +104 -48
rag_server-0.0.2.dist-info/METADATA +64 -0
rag_server-0.0.2.dist-info/RECORD +11 -0
rag_server-0.0.1.dist-info/METADATA +0 -46
rag_server-0.0.1.dist-info/RECORD +0 -11
{rag_server-0.0.1.dist-info → rag_server-0.0.2.dist-info}/WHEEL +0 -0
{rag_server-0.0.1.dist-info → rag_server-0.0.2.dist-info}/entry_points.txt +0 -0

rag_server/server.py CHANGED Viewed

@@ -10,24 +10,32 @@ from rag_server.utils.vector.store import VectorStore
 # Initialize the MCP server
 mcp = FastMCP(name="syne_rag_server", instructions= "You are a helpful assistant that can answer questions about the documents in the session.")
-# In-memory sessions: mapping session_id -> VectorStore
-_sessions = {}
 @mcp.tool(
     description="Ingest a list of public URLs (PDF, DOCX, DOC) into an ephemeral session. Returns a session_id to use for querying. You can pass in a session_id to ingest into a specific session."
 )
-def ingest_urls(urls: list[str], session: Optional[str] = None) -> str:
+def ingest_urls(urls: list[str], session_id: Optional[str] = None) -> str:
     """
     Ingest a list of public URLs (PDF, DOCX, DOC) into an ephemeral session.
     Returns a session_id to use for querying.
     """
-    session_id = str(uuid.uuid4() if session is None else session)
-    vs = VectorStore()
+    # Determine or generate session ID and init persistent store
+    session_id = session_id or str(uuid.uuid4())
+    vs = VectorStore(session_id)
+    # Extract and chunk each URL, with fallback to URL string on error
+    all_chunks: list[str] = []
     for url in urls:
-        text = extract_text_from_url(url)
-        chunks = chunk_text(text)
-        vs.add(chunks)
-    _sessions[session_id] = vs
+        try:
+            text = extract_text_from_url(url)
+            chunks = chunk_text(text)
+        except Exception:
+            # Fallback: use the URL itself as a chunk
+            chunks = [url]
+        all_chunks.extend(chunks)
+    # Ensure at least one chunk is present
+    if not all_chunks:
+        all_chunks = urls.copy()
+    # Add chunks to the vector store
+    vs.add(all_chunks)
     return session_id
 @mcp.tool(
@@ -38,9 +46,8 @@ def query_knowledge(session_id: str, question: str) -> str:
     Query the ingested documents in the given session using RAG.
     Returns a generated answer.
     """
-    vs = _sessions.get(session_id)
-    if not vs:
-        return f"Session ID {session_id} not found. Please call ingest_urls first."
+    # Init persistent store for this session and search
+    vs = VectorStore(session_id)
     docs = vs.search(question)
     context = "\n\n".join(docs)
     return context

rag_server/utils/llm.py CHANGED Viewed

@@ -1,9 +1,14 @@
 import os
 from openai import OpenAI
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
 openai_client = OpenAI(
     api_key=os.environ.get("OPENAI_API_KEY"),
     base_url=os.environ.get("OPENAI_API_URL"),
 )
+gemini_client = GoogleGenerativeAIEmbeddings(
+    model="models/text-embedding-004",
+    google_api_key=os.environ.get("GEMINI_API_KEY"),
+)

rag_server/utils/vector/misc.py CHANGED Viewed

@@ -1,13 +1,12 @@
 import io
+from typing import List
 import docx
 import requests
 import textract
 from PyPDF2 import PdfReader
 from openai.types import CreateEmbeddingResponse
-from rag_server.utils.llm import openai_client
+from rag_server.utils.llm import openai_client, gemini_client
 def extract_text_from_url(url: str) -> str:
     """Download the file at the given URL and extract its text."""
@@ -39,12 +38,32 @@ def chunk_text(text: str, chunk_size: int = 500, overlap: int = 50) -> list[str]
     return chunks
-def embed_texts(texts: list[str]) -> list[list[float]]:
-    """Embed a list of texts using OpenAI embeddings."""
-    resp : CreateEmbeddingResponse = openai_client.embeddings.create(input=texts, model="text-embedding-ada-002")
-    return [d.embedding for d in resp.data]
+def embed_texts(texts: list[str], retries: int = 3) -> list[list[float]]:
+    """Embed a list of texts using OpenAI embeddings with rate limit handling."""
+    for attempt in range(retries):
+        try:
+            # Try text-embedding-3-small first as it's cheaper and newer
+            resp: CreateEmbeddingResponse = openai_client.embeddings.create(
+                input=texts,
+                model="text-embedding-3-small"
+            )
+            return [d.embedding for d in resp.data]
+        except Exception as e:
+            if "too many requests" in str(e).lower() and attempt < retries - 1:
+                # If rate limited and not last attempt, wait and retry
+                import time
+                time.sleep(2 ** attempt)  # Exponential backoff
+                continue
+            elif attempt == retries - 1:
+                # On last attempt, fallback to ada-002
+                resp: List[List[float]] = gemini_client.embed_documents(
+                    texts=texts,
+                    task_type="RETRIEVAL_DOCUMENT"
+                )
+                return resp
+            else:
+                raise
-def get_embedding(text: str) -> list[float]:
-    """Embed a single text."""
-    resp : CreateEmbeddingResponse = openai_client.embeddings.create(input=text, model="text-embedding-ada-002")
-    return resp.data[0].embedding
+def get_embedding(text: str, retries: int = 3) -> list[float]:
+    """Embed a single text with rate limit handling."""
+    return embed_texts([text], retries=retries)[0]

rag_server/utils/vector/store.py CHANGED Viewed

@@ -1,67 +1,123 @@
 import faiss
 import numpy as np
-from sklearn.feature_extraction.text import TfidfVectorizer
-from rag_server.utils.vector.misc import get_embedding, embed_texts
+import uuid
+import chromadb
+from chromadb.config import Settings
+from rag_server.utils.vector.misc import get_embedding
+from concurrent.futures import ThreadPoolExecutor
+class EmbeddingAdapter:
+    """Adapter to satisfy ChromaDB EmbeddingFunction interface."""
+    def __call__(self, input: list[str]) -> list[list[float]]:
+        # Use ThreadPoolExecutor for parallel embedding
+        with ThreadPoolExecutor() as executor:
+            embeddings = list(executor.map(get_embedding, input))
+        return embeddings
 class VectorStore:
-    """Simple in-memory vector store using FAISS."""
-    def __init__(self, dim: int = 1536):
+    """Persistent vector store using ChromaDB for storage and FAISS for fast retrieval."""
+    def __init__(self, session_id: str, persist_directory: str = "chroma_db", dim: int = 1536):
+        self.session_id = session_id
         self.dim = dim
-        # Use an HNSW approximate nearest neighbor index (no training needed)
+        # Initialize persistent ChromaDB client
+        self.chroma_client = chromadb.PersistentClient(path=persist_directory, settings=Settings())
+        # Create or open the 'chunks' collection with our embedding function
+        self.collection = self.chroma_client.get_or_create_collection(
+            name="chunks",
+            embedding_function=EmbeddingAdapter()
+        )
+        # Initialize FAISS HNSW index for fast approx. kNN
         self.index = faiss.index_factory(dim, "HNSW32")
-        # Configure HNSW parameters for construction and search quality
         try:
             self.index.hnsw.efConstruction = 200
             self.index.hnsw.efSearch = 128
         except AttributeError:
             pass
-        self.texts: list[str] = []
-        # Initialize TF-IDF vectorizer and matrix
-        self.vectorizer = TfidfVectorizer()
-        self.tfidf_matrix = None
+        # Track FAISS IDs and text mapping
+        self.ids: list[str] = []
+        self.id_to_chunk: dict[str, str] = {}
     def add(self, chunks: list[str]) -> None:
-        embeddings = embed_texts(chunks)
+        # Generate unique IDs per chunk
+        new_ids = [f"{self.session_id}-{i}-{uuid.uuid4()}" for i in range(len(chunks))]
+        # Compute embeddings in parallel using ThreadPoolExecutor
+        with ThreadPoolExecutor() as executor:
+            embeddings = list(executor.map(get_embedding, chunks))
+        # Persist to ChromaDB
+        self.collection.add(
+            ids=new_ids,
+            documents=chunks,
+            metadatas=[{"session_id": self.session_id}] * len(chunks),
+            embeddings=embeddings
+        )
+        # Add to FAISS index in-memory
         arr = np.array(embeddings, dtype="float32")
         self.index.add(arr)
-        self.texts.extend(chunks)
-        # Update TF-IDF matrix
-        self.tfidf_matrix = self.vectorizer.fit_transform(self.texts)
+        # Update ID list and mapping in parallel
+        def update_mapping(args):
+            idx, chunk = args
+            self.id_to_chunk[idx] = chunk
+        self.ids.extend(new_ids)
+        with ThreadPoolExecutor() as executor:
+            executor.map(update_mapping, zip(new_ids, chunks))
-    def search(self, query: str, top_k: int = 5, alpha: float = 0.5) -> list[str]:
-        """Perform hybrid search combining semantic (FAISS) and lexical (TF-IDF) scores."""
-        # Semantic search via FAISS
+    def search(self, query: str, top_k: int = 5) -> list[str]:
+        # On first search, lazy-load all persisted embeddings for this session into FAISS
+        if self.index.ntotal == 0:
+            # Load this session's embeddings and documents from ChromaDB
+            records = self.collection.get(
+                where={"session_id": self.session_id},
+                include=["embeddings", "documents"],
+            )
+            emb_list = records.get("embeddings", [])
+            # Safely check length of embeddings
+            try:
+                count = len(emb_list)
+            except Exception:
+                count = 0
+            # Convert to array if there are embeddings, otherwise create empty array
+            if count > 0:
+                arr = np.array(emb_list, dtype="float32")
+            else:
+                arr = np.empty((0, self.dim), dtype="float32")
+            if arr.shape[0] > 0:
+                # Populate FAISS index and ID mapping
+                self.index.add(arr)
+                # 'ids' and 'documents' are returned by ChromaDB
+                self.ids = records["ids"]
+                # Update mapping in parallel
+                with ThreadPoolExecutor() as executor:
+                    executor.map(
+                        lambda x: self.id_to_chunk.update({x[0]: x[1]}),
+                        zip(records["ids"], records["documents"])
+                    )
+        # If still no data for this session, return empty
+        if self.index.ntotal == 0:
+            return []
+        # Compute embedding for the query
         q_emb = np.array([get_embedding(query)], dtype="float32")
+        # Retrieve top_k IDs via FAISS
         D, I = self.index.search(q_emb, top_k)
-        vect_ids = I[0].tolist()
-        vect_scores = [-d for d in D[0]]
-        # Lexical search via TF-IDF
-        if self.tfidf_matrix is None:
-            self.tfidf_matrix = self.vectorizer.fit_transform(self.texts)
-        q_tfidf = self.vectorizer.transform([query])
-        tfidf_scores_all = q_tfidf.dot(self.tfidf_matrix.T).toarray()[0]
-        tfidf_top = np.argsort(-tfidf_scores_all)[:top_k].tolist()
-        # Combine candidate document indices
-        candidate_ids = set(vect_ids + tfidf_top)
-        vect_min = min(vect_scores) if vect_scores else 0.0
-        scores = []
-        for idx in candidate_ids:
-            vs = vect_scores[vect_ids.index(idx)] if idx in vect_ids else vect_min
-            ts = float(tfidf_scores_all[idx])
-            scores.append((idx, vs, ts))
-        # Normalize and blend scores
-        vs_vals = [v for _, v, _ in scores]
-        ts_vals = [t for _, _, t in scores]
-        vmin, vmax = min(vs_vals), max(vs_vals)
-        tmin, tmax = min(ts_vals), max(ts_vals)
-        blended = []
-        for idx, vs, ts in scores:
-            vn = (vs - vmin) / (vmax - vmin) if vmax > vmin else 0.0
-            tn = (ts - tmin) / (tmax - tmin) if tmax > tmin else 0.0
-            combined = alpha * vn + (1 - alpha) * tn
-            blended.append((idx, combined))
-        # Sort by blended score and return top_k chunks
-        top = sorted(blended, key=lambda x: x[1], reverse=True)[:top_k]
-        return [self.texts[i] for i, _ in top]
+        result_ids = [self.ids[i] for i in I[0]]
+        # Deduplicate IDs while preserving order to avoid Chroma duplicate errors
+        seen = set()
+        unique_ids = []
+        for rid in result_ids:
+            if rid not in seen:
+                seen.add(rid)
+                unique_ids.append(rid)
+        if not unique_ids:
+            return []
+        # Fetch documents from ChromaDB
+        results = self.collection.get(ids=unique_ids)
+        return results["documents"]

rag_server-0.0.2.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,64 @@
+Metadata-Version: 2.4
+Name: rag_server
+Version: 0.0.2
+Summary: A FastMCP-based RAG server for dynamic document ingestion
+Project-URL: Homepage, https://github.com/synehq/mcp-hybrid-rag
+Project-URL: Bug Tracker, https://github.com/synehq/mcp-hybrid-rag/issues
+Author-email: SyneHQ <human@synehq.com>
+License-Expression: MIT
+Requires-Python: >=3.12
+Requires-Dist: chromadb
+Requires-Dist: faiss-cpu
+Requires-Dist: fastmcp
+Requires-Dist: langchain-google-genai
+Requires-Dist: numpy
+Requires-Dist: openai
+Requires-Dist: pypdf2
+Requires-Dist: python-docx
+Requires-Dist: requests
+Requires-Dist: six
+Requires-Dist: textract-py3
+Provides-Extra: dev
+Requires-Dist: pytest; extra == 'dev'
+Description-Content-Type: text/markdown
+# RAG Server
+A FastMCP-based Retrieval-Augmented Generation server for dynamically ingesting public documents and querying them on-the-fly. This server implements the Model Context Protocol (MCP) to enable seamless integration between AI models and external data sources.
+## Features
+- Document ingestion from public URLs (PDF, DOCX, DOC)
+- Hybrid vector search using both OpenAI and Google Gemini embeddings
+- Session-based context management via MCP
+- Automatic fallback and retry mechanisms for embedding generation
+- Support for chunking and overlapping text segments
+## Installation
+```
+uv pip install -e .
+```
+## Tools
+The server exposes the following MCP tools defined in `src/rag_server/server.py`:
+### `ingest_urls`
+**Description**: Ingest a list of public URLs (PDF, DOCX, DOC) into an ephemeral session. Returns a `session_id` for querying. You can pass an existing `session_id` to ingest into a specific session.
+**Signature**: `ingest_urls(urls: list[str], session_id: Optional[str] = None) -> str`
+- `urls`: List of public document URLs to ingest.
+- `session_id` _(optional)_: Existing session identifier.
+### `query_knowledge`
+**Description**: Query the ingested documents in the given session using RAG. Returns a generated answer.
+**Signature**: `query_knowledge(session_id: str, question: str) -> str`
+- `session_id`: Session identifier where documents were ingested.
+- `question`: The question to query against ingested documents.

rag_server-0.0.2.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,11 @@
+rag_server/__init__.py,sha256=KNZ1bD9ZGfyZwlv91Ueeega_1lsRDLs2fYQDgNbBdtc,212
+rag_server/server.py,sha256=UuZuvMhAF29IMNVaeDnEpet3zLnY-udUAfYaZFnpe78,2011
+rag_server/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+rag_server/utils/llm.py,sha256=b-6p1hL7nBeia5nsUp7--jtSDuTt9taqjkejq6SwyLk,362
+rag_server/utils/vector/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+rag_server/utils/vector/misc.py,sha256=l7gi4CLaJyPfPKxMlE1F-hpHkQH59pIy-Opjf3XNZQg,2616
+rag_server/utils/vector/store.py,sha256=WpHsnTpVoEIey5kGMck4AijmF5fGvto8Kz87VhvsSBY,4921
+rag_server-0.0.2.dist-info/METADATA,sha256=pLPyWgoQheEmofws63ihN5OxZ_O5eCVVTgJzIKpzcqo,2185
+rag_server-0.0.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+rag_server-0.0.2.dist-info/entry_points.txt,sha256=sWdH-o-5Mge0fcw28bZ-lAMdlVq3PJOsXTZSzZy_ndc,76
+rag_server-0.0.2.dist-info/RECORD,,

rag_server-0.0.1.dist-info/METADATA DELETED Viewed

@@ -1,46 +0,0 @@
-Metadata-Version: 2.4
-Name: rag_server
-Version: 0.0.1
-Summary: A FastMCP-based RAG server for dynamic document ingestion
-Project-URL: Homepage, https://github.com/synehq/mcp-hybrid-rag
-Project-URL: Bug Tracker, https://github.com/synehq/mcp-hybrid-rag/issues
-Author-email: SyneHQ <human@synehq.com>
-License-Expression: MIT
-Requires-Python: >=3.10
-Requires-Dist: faiss-cpu
-Requires-Dist: fastmcp
-Requires-Dist: numpy
-Requires-Dist: openai
-Requires-Dist: pypdf2
-Requires-Dist: python-docx
-Requires-Dist: requests
-Requires-Dist: scikit-learn
-Requires-Dist: textract
-Description-Content-Type: text/markdown
-# RAG Server
-A FastMCP-based Retrieval-Augmented Generation server for dynamically ingesting public documents and querying them on-the-fly.
-## Installation
-```bash
-pip install -r requirements.txt
-```
-Ensure you set your OpenAI API key:
-```bash
-export OPENAI_API_KEY=your_key_here
-```
-## Running the server
-```bash
-python -m rag_server.server
-```
-## API Tools
-- ingest_urls(urls: List[str], session_id: Optional[str]) -> session_id
-- query_knowledge(session_id: str, question: str) -> answer

rag_server-0.0.1.dist-info/RECORD DELETED Viewed

@@ -1,11 +0,0 @@
-rag_server/__init__.py,sha256=KNZ1bD9ZGfyZwlv91Ueeega_1lsRDLs2fYQDgNbBdtc,212
-rag_server/server.py,sha256=75IV2Ggowcx30LEtFy1stRbJGodsgvsD-CKObhbCeg4,1699
-rag_server/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-rag_server/utils/llm.py,sha256=yEmxoRQ750LGu8ufWu38RoX0umBRWw8q0GQxzFmqAy8,158
-rag_server/utils/vector/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-rag_server/utils/vector/misc.py,sha256=NbBRzU6RBc4A5Pu0cl76dutuZZfj_abwuAkKjM-LD6k,1768
-rag_server/utils/vector/store.py,sha256=b7GtzjnXuqDVpQHMZ4Otms4wIY4zB0y6aLBCu58DSNE,2929
-rag_server-0.0.1.dist-info/METADATA,sha256=i9DFzwVljGdfABtAK21WFGM9JoxE0hdAPloJlpE0za0,1104
-rag_server-0.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-rag_server-0.0.1.dist-info/entry_points.txt,sha256=sWdH-o-5Mge0fcw28bZ-lAMdlVq3PJOsXTZSzZy_ndc,76
-rag_server-0.0.1.dist-info/RECORD,,

{rag_server-0.0.1.dist-info → rag_server-0.0.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{rag_server-0.0.1.dist-info → rag_server-0.0.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

rag_server 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl

rag_server 0.0.1py3-none-any.whl → 0.0.2py3-none-any.whl