rag_server 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rag_server/server.py CHANGED
@@ -10,24 +10,32 @@ from rag_server.utils.vector.store import VectorStore
10
10
  # Initialize the MCP server
11
11
  mcp = FastMCP(name="syne_rag_server", instructions= "You are a helpful assistant that can answer questions about the documents in the session.")
12
12
 
13
- # In-memory sessions: mapping session_id -> VectorStore
14
- _sessions = {}
15
-
16
13
  @mcp.tool(
17
14
  description="Ingest a list of public URLs (PDF, DOCX, DOC) into an ephemeral session. Returns a session_id to use for querying. You can pass in a session_id to ingest into a specific session."
18
15
  )
19
- def ingest_urls(urls: list[str], session: Optional[str] = None) -> str:
16
+ def ingest_urls(urls: list[str], session_id: Optional[str] = None) -> str:
20
17
  """
21
18
  Ingest a list of public URLs (PDF, DOCX, DOC) into an ephemeral session.
22
19
  Returns a session_id to use for querying.
23
20
  """
24
- session_id = str(uuid.uuid4() if session is None else session)
25
- vs = VectorStore()
21
+ # Determine or generate session ID and init persistent store
22
+ session_id = session_id or str(uuid.uuid4())
23
+ vs = VectorStore(session_id)
24
+ # Extract and chunk each URL, with fallback to URL string on error
25
+ all_chunks: list[str] = []
26
26
  for url in urls:
27
- text = extract_text_from_url(url)
28
- chunks = chunk_text(text)
29
- vs.add(chunks)
30
- _sessions[session_id] = vs
27
+ try:
28
+ text = extract_text_from_url(url)
29
+ chunks = chunk_text(text)
30
+ except Exception:
31
+ # Fallback: use the URL itself as a chunk
32
+ chunks = [url]
33
+ all_chunks.extend(chunks)
34
+ # Ensure at least one chunk is present
35
+ if not all_chunks:
36
+ all_chunks = urls.copy()
37
+ # Add chunks to the vector store
38
+ vs.add(all_chunks)
31
39
  return session_id
32
40
 
33
41
  @mcp.tool(
@@ -38,9 +46,8 @@ def query_knowledge(session_id: str, question: str) -> str:
38
46
  Query the ingested documents in the given session using RAG.
39
47
  Returns a generated answer.
40
48
  """
41
- vs = _sessions.get(session_id)
42
- if not vs:
43
- return f"Session ID {session_id} not found. Please call ingest_urls first."
49
+ # Init persistent store for this session and search
50
+ vs = VectorStore(session_id)
44
51
  docs = vs.search(question)
45
52
  context = "\n\n".join(docs)
46
53
  return context
rag_server/utils/llm.py CHANGED
@@ -1,9 +1,14 @@
1
1
  import os
2
2
 
3
3
  from openai import OpenAI
4
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
4
5
 
5
6
  openai_client = OpenAI(
6
7
  api_key=os.environ.get("OPENAI_API_KEY"),
7
8
  base_url=os.environ.get("OPENAI_API_URL"),
8
9
  )
9
10
 
11
+ gemini_client = GoogleGenerativeAIEmbeddings(
12
+ model="models/text-embedding-004",
13
+ google_api_key=os.environ.get("GEMINI_API_KEY"),
14
+ )
@@ -1,13 +1,12 @@
1
1
  import io
2
+ from typing import List
2
3
 
3
4
  import docx
4
5
  import requests
5
6
  import textract
6
7
  from PyPDF2 import PdfReader
7
8
  from openai.types import CreateEmbeddingResponse
8
-
9
- from rag_server.utils.llm import openai_client
10
-
9
+ from rag_server.utils.llm import openai_client, gemini_client
11
10
 
12
11
  def extract_text_from_url(url: str) -> str:
13
12
  """Download the file at the given URL and extract its text."""
@@ -39,12 +38,32 @@ def chunk_text(text: str, chunk_size: int = 500, overlap: int = 50) -> list[str]
39
38
  return chunks
40
39
 
41
40
 
42
- def embed_texts(texts: list[str]) -> list[list[float]]:
43
- """Embed a list of texts using OpenAI embeddings."""
44
- resp : CreateEmbeddingResponse = openai_client.embeddings.create(input=texts, model="text-embedding-ada-002")
45
- return [d.embedding for d in resp.data]
41
+ def embed_texts(texts: list[str], retries: int = 3) -> list[list[float]]:
42
+ """Embed a list of texts using OpenAI embeddings with rate limit handling."""
43
+ for attempt in range(retries):
44
+ try:
45
+ # Try text-embedding-3-small first as it's cheaper and newer
46
+ resp: CreateEmbeddingResponse = openai_client.embeddings.create(
47
+ input=texts,
48
+ model="text-embedding-3-small"
49
+ )
50
+ return [d.embedding for d in resp.data]
51
+ except Exception as e:
52
+ if "too many requests" in str(e).lower() and attempt < retries - 1:
53
+ # If rate limited and not last attempt, wait and retry
54
+ import time
55
+ time.sleep(2 ** attempt) # Exponential backoff
56
+ continue
57
+ elif attempt == retries - 1:
58
+ # On last attempt, fallback to ada-002
59
+ resp: List[List[float]] = gemini_client.embed_documents(
60
+ texts=texts,
61
+ task_type="RETRIEVAL_DOCUMENT"
62
+ )
63
+ return resp
64
+ else:
65
+ raise
46
66
 
47
- def get_embedding(text: str) -> list[float]:
48
- """Embed a single text."""
49
- resp : CreateEmbeddingResponse = openai_client.embeddings.create(input=text, model="text-embedding-ada-002")
50
- return resp.data[0].embedding
67
+ def get_embedding(text: str, retries: int = 3) -> list[float]:
68
+ """Embed a single text with rate limit handling."""
69
+ return embed_texts([text], retries=retries)[0]
@@ -1,67 +1,123 @@
1
1
  import faiss
2
2
  import numpy as np
3
- from sklearn.feature_extraction.text import TfidfVectorizer
4
-
5
- from rag_server.utils.vector.misc import get_embedding, embed_texts
3
+ import uuid
4
+ import chromadb
5
+ from chromadb.config import Settings
6
+ from rag_server.utils.vector.misc import get_embedding
7
+ from concurrent.futures import ThreadPoolExecutor
6
8
 
9
+ class EmbeddingAdapter:
10
+ """Adapter to satisfy ChromaDB EmbeddingFunction interface."""
11
+ def __call__(self, input: list[str]) -> list[list[float]]:
12
+ # Use ThreadPoolExecutor for parallel embedding
13
+ with ThreadPoolExecutor() as executor:
14
+ embeddings = list(executor.map(get_embedding, input))
15
+ return embeddings
7
16
 
8
17
  class VectorStore:
9
- """Simple in-memory vector store using FAISS."""
10
- def __init__(self, dim: int = 1536):
18
+ """Persistent vector store using ChromaDB for storage and FAISS for fast retrieval."""
19
+ def __init__(self, session_id: str, persist_directory: str = "chroma_db", dim: int = 1536):
20
+ self.session_id = session_id
11
21
  self.dim = dim
12
- # Use an HNSW approximate nearest neighbor index (no training needed)
22
+ # Initialize persistent ChromaDB client
23
+ self.chroma_client = chromadb.PersistentClient(path=persist_directory, settings=Settings())
24
+ # Create or open the 'chunks' collection with our embedding function
25
+ self.collection = self.chroma_client.get_or_create_collection(
26
+ name="chunks",
27
+ embedding_function=EmbeddingAdapter()
28
+ )
29
+ # Initialize FAISS HNSW index for fast approx. kNN
13
30
  self.index = faiss.index_factory(dim, "HNSW32")
14
- # Configure HNSW parameters for construction and search quality
15
31
  try:
16
32
  self.index.hnsw.efConstruction = 200
17
33
  self.index.hnsw.efSearch = 128
18
34
  except AttributeError:
19
35
  pass
20
- self.texts: list[str] = []
21
- # Initialize TF-IDF vectorizer and matrix
22
- self.vectorizer = TfidfVectorizer()
23
- self.tfidf_matrix = None
36
+ # Track FAISS IDs and text mapping
37
+ self.ids: list[str] = []
38
+ self.id_to_chunk: dict[str, str] = {}
24
39
 
25
40
  def add(self, chunks: list[str]) -> None:
26
- embeddings = embed_texts(chunks)
41
+ # Generate unique IDs per chunk
42
+ new_ids = [f"{self.session_id}-{i}-{uuid.uuid4()}" for i in range(len(chunks))]
43
+
44
+ # Compute embeddings in parallel using ThreadPoolExecutor
45
+ with ThreadPoolExecutor() as executor:
46
+ embeddings = list(executor.map(get_embedding, chunks))
47
+
48
+ # Persist to ChromaDB
49
+ self.collection.add(
50
+ ids=new_ids,
51
+ documents=chunks,
52
+ metadatas=[{"session_id": self.session_id}] * len(chunks),
53
+ embeddings=embeddings
54
+ )
55
+
56
+ # Add to FAISS index in-memory
27
57
  arr = np.array(embeddings, dtype="float32")
28
58
  self.index.add(arr)
29
- self.texts.extend(chunks)
30
- # Update TF-IDF matrix
31
- self.tfidf_matrix = self.vectorizer.fit_transform(self.texts)
59
+
60
+ # Update ID list and mapping in parallel
61
+ def update_mapping(args):
62
+ idx, chunk = args
63
+ self.id_to_chunk[idx] = chunk
64
+
65
+ self.ids.extend(new_ids)
66
+ with ThreadPoolExecutor() as executor:
67
+ executor.map(update_mapping, zip(new_ids, chunks))
32
68
 
33
- def search(self, query: str, top_k: int = 5, alpha: float = 0.5) -> list[str]:
34
- """Perform hybrid search combining semantic (FAISS) and lexical (TF-IDF) scores."""
35
- # Semantic search via FAISS
69
+ def search(self, query: str, top_k: int = 5) -> list[str]:
70
+ # On first search, lazy-load all persisted embeddings for this session into FAISS
71
+ if self.index.ntotal == 0:
72
+ # Load this session's embeddings and documents from ChromaDB
73
+ records = self.collection.get(
74
+ where={"session_id": self.session_id},
75
+ include=["embeddings", "documents"],
76
+ )
77
+ emb_list = records.get("embeddings", [])
78
+ # Safely check length of embeddings
79
+ try:
80
+ count = len(emb_list)
81
+ except Exception:
82
+ count = 0
83
+ # Convert to array if there are embeddings, otherwise create empty array
84
+ if count > 0:
85
+ arr = np.array(emb_list, dtype="float32")
86
+ else:
87
+ arr = np.empty((0, self.dim), dtype="float32")
88
+ if arr.shape[0] > 0:
89
+ # Populate FAISS index and ID mapping
90
+ self.index.add(arr)
91
+ # 'ids' and 'documents' are returned by ChromaDB
92
+ self.ids = records["ids"]
93
+ # Update mapping in parallel
94
+ with ThreadPoolExecutor() as executor:
95
+ executor.map(
96
+ lambda x: self.id_to_chunk.update({x[0]: x[1]}),
97
+ zip(records["ids"], records["documents"])
98
+ )
99
+
100
+ # If still no data for this session, return empty
101
+ if self.index.ntotal == 0:
102
+ return []
103
+
104
+ # Compute embedding for the query
36
105
  q_emb = np.array([get_embedding(query)], dtype="float32")
106
+ # Retrieve top_k IDs via FAISS
37
107
  D, I = self.index.search(q_emb, top_k)
38
- vect_ids = I[0].tolist()
39
- vect_scores = [-d for d in D[0]]
40
- # Lexical search via TF-IDF
41
- if self.tfidf_matrix is None:
42
- self.tfidf_matrix = self.vectorizer.fit_transform(self.texts)
43
- q_tfidf = self.vectorizer.transform([query])
44
- tfidf_scores_all = q_tfidf.dot(self.tfidf_matrix.T).toarray()[0]
45
- tfidf_top = np.argsort(-tfidf_scores_all)[:top_k].tolist()
46
- # Combine candidate document indices
47
- candidate_ids = set(vect_ids + tfidf_top)
48
- vect_min = min(vect_scores) if vect_scores else 0.0
49
- scores = []
50
- for idx in candidate_ids:
51
- vs = vect_scores[vect_ids.index(idx)] if idx in vect_ids else vect_min
52
- ts = float(tfidf_scores_all[idx])
53
- scores.append((idx, vs, ts))
54
- # Normalize and blend scores
55
- vs_vals = [v for _, v, _ in scores]
56
- ts_vals = [t for _, _, t in scores]
57
- vmin, vmax = min(vs_vals), max(vs_vals)
58
- tmin, tmax = min(ts_vals), max(ts_vals)
59
- blended = []
60
- for idx, vs, ts in scores:
61
- vn = (vs - vmin) / (vmax - vmin) if vmax > vmin else 0.0
62
- tn = (ts - tmin) / (tmax - tmin) if tmax > tmin else 0.0
63
- combined = alpha * vn + (1 - alpha) * tn
64
- blended.append((idx, combined))
65
- # Sort by blended score and return top_k chunks
66
- top = sorted(blended, key=lambda x: x[1], reverse=True)[:top_k]
67
- return [self.texts[i] for i, _ in top]
108
+ result_ids = [self.ids[i] for i in I[0]]
109
+
110
+ # Deduplicate IDs while preserving order to avoid Chroma duplicate errors
111
+ seen = set()
112
+ unique_ids = []
113
+ for rid in result_ids:
114
+ if rid not in seen:
115
+ seen.add(rid)
116
+ unique_ids.append(rid)
117
+
118
+ if not unique_ids:
119
+ return []
120
+
121
+ # Fetch documents from ChromaDB
122
+ results = self.collection.get(ids=unique_ids)
123
+ return results["documents"]
@@ -0,0 +1,64 @@
1
+ Metadata-Version: 2.4
2
+ Name: rag_server
3
+ Version: 0.0.2
4
+ Summary: A FastMCP-based RAG server for dynamic document ingestion
5
+ Project-URL: Homepage, https://github.com/synehq/mcp-hybrid-rag
6
+ Project-URL: Bug Tracker, https://github.com/synehq/mcp-hybrid-rag/issues
7
+ Author-email: SyneHQ <human@synehq.com>
8
+ License-Expression: MIT
9
+ Requires-Python: >=3.12
10
+ Requires-Dist: chromadb
11
+ Requires-Dist: faiss-cpu
12
+ Requires-Dist: fastmcp
13
+ Requires-Dist: langchain-google-genai
14
+ Requires-Dist: numpy
15
+ Requires-Dist: openai
16
+ Requires-Dist: pypdf2
17
+ Requires-Dist: python-docx
18
+ Requires-Dist: requests
19
+ Requires-Dist: six
20
+ Requires-Dist: textract-py3
21
+ Provides-Extra: dev
22
+ Requires-Dist: pytest; extra == 'dev'
23
+ Description-Content-Type: text/markdown
24
+
25
+ # RAG Server
26
+
27
+ A FastMCP-based Retrieval-Augmented Generation server for dynamically ingesting public documents and querying them on-the-fly. This server implements the Model Context Protocol (MCP) to enable seamless integration between AI models and external data sources.
28
+
29
+ ## Features
30
+
31
+ - Document ingestion from public URLs (PDF, DOCX, DOC)
32
+ - Hybrid vector search using both OpenAI and Google Gemini embeddings
33
+ - Session-based context management via MCP
34
+ - Automatic fallback and retry mechanisms for embedding generation
35
+ - Support for chunking and overlapping text segments
36
+
37
+ ## Installation
38
+
39
+ ```
40
+ uv pip install -e .
41
+ ```
42
+
43
+ ## Tools
44
+
45
+ The server exposes the following MCP tools defined in `src/rag_server/server.py`:
46
+
47
+ ### `ingest_urls`
48
+
49
+ **Description**: Ingest a list of public URLs (PDF, DOCX, DOC) into an ephemeral session. Returns a `session_id` for querying. You can pass an existing `session_id` to ingest into a specific session.
50
+
51
+ **Signature**: `ingest_urls(urls: list[str], session_id: Optional[str] = None) -> str`
52
+
53
+ - `urls`: List of public document URLs to ingest.
54
+ - `session_id` _(optional)_: Existing session identifier.
55
+
56
+ ### `query_knowledge`
57
+
58
+ **Description**: Query the ingested documents in the given session using RAG. Returns a generated answer.
59
+
60
+ **Signature**: `query_knowledge(session_id: str, question: str) -> str`
61
+
62
+ - `session_id`: Session identifier where documents were ingested.
63
+ - `question`: The question to query against ingested documents.
64
+
@@ -0,0 +1,11 @@
1
+ rag_server/__init__.py,sha256=KNZ1bD9ZGfyZwlv91Ueeega_1lsRDLs2fYQDgNbBdtc,212
2
+ rag_server/server.py,sha256=UuZuvMhAF29IMNVaeDnEpet3zLnY-udUAfYaZFnpe78,2011
3
+ rag_server/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ rag_server/utils/llm.py,sha256=b-6p1hL7nBeia5nsUp7--jtSDuTt9taqjkejq6SwyLk,362
5
+ rag_server/utils/vector/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ rag_server/utils/vector/misc.py,sha256=l7gi4CLaJyPfPKxMlE1F-hpHkQH59pIy-Opjf3XNZQg,2616
7
+ rag_server/utils/vector/store.py,sha256=WpHsnTpVoEIey5kGMck4AijmF5fGvto8Kz87VhvsSBY,4921
8
+ rag_server-0.0.2.dist-info/METADATA,sha256=pLPyWgoQheEmofws63ihN5OxZ_O5eCVVTgJzIKpzcqo,2185
9
+ rag_server-0.0.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
+ rag_server-0.0.2.dist-info/entry_points.txt,sha256=sWdH-o-5Mge0fcw28bZ-lAMdlVq3PJOsXTZSzZy_ndc,76
11
+ rag_server-0.0.2.dist-info/RECORD,,
@@ -1,46 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: rag_server
3
- Version: 0.0.1
4
- Summary: A FastMCP-based RAG server for dynamic document ingestion
5
- Project-URL: Homepage, https://github.com/synehq/mcp-hybrid-rag
6
- Project-URL: Bug Tracker, https://github.com/synehq/mcp-hybrid-rag/issues
7
- Author-email: SyneHQ <human@synehq.com>
8
- License-Expression: MIT
9
- Requires-Python: >=3.10
10
- Requires-Dist: faiss-cpu
11
- Requires-Dist: fastmcp
12
- Requires-Dist: numpy
13
- Requires-Dist: openai
14
- Requires-Dist: pypdf2
15
- Requires-Dist: python-docx
16
- Requires-Dist: requests
17
- Requires-Dist: scikit-learn
18
- Requires-Dist: textract
19
- Description-Content-Type: text/markdown
20
-
21
- # RAG Server
22
-
23
- A FastMCP-based Retrieval-Augmented Generation server for dynamically ingesting public documents and querying them on-the-fly.
24
-
25
- ## Installation
26
-
27
- ```bash
28
- pip install -r requirements.txt
29
- ```
30
-
31
- Ensure you set your OpenAI API key:
32
-
33
- ```bash
34
- export OPENAI_API_KEY=your_key_here
35
- ```
36
-
37
- ## Running the server
38
-
39
- ```bash
40
- python -m rag_server.server
41
- ```
42
-
43
- ## API Tools
44
-
45
- - ingest_urls(urls: List[str], session_id: Optional[str]) -> session_id
46
- - query_knowledge(session_id: str, question: str) -> answer
@@ -1,11 +0,0 @@
1
- rag_server/__init__.py,sha256=KNZ1bD9ZGfyZwlv91Ueeega_1lsRDLs2fYQDgNbBdtc,212
2
- rag_server/server.py,sha256=75IV2Ggowcx30LEtFy1stRbJGodsgvsD-CKObhbCeg4,1699
3
- rag_server/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- rag_server/utils/llm.py,sha256=yEmxoRQ750LGu8ufWu38RoX0umBRWw8q0GQxzFmqAy8,158
5
- rag_server/utils/vector/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- rag_server/utils/vector/misc.py,sha256=NbBRzU6RBc4A5Pu0cl76dutuZZfj_abwuAkKjM-LD6k,1768
7
- rag_server/utils/vector/store.py,sha256=b7GtzjnXuqDVpQHMZ4Otms4wIY4zB0y6aLBCu58DSNE,2929
8
- rag_server-0.0.1.dist-info/METADATA,sha256=i9DFzwVljGdfABtAK21WFGM9JoxE0hdAPloJlpE0za0,1104
9
- rag_server-0.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
- rag_server-0.0.1.dist-info/entry_points.txt,sha256=sWdH-o-5Mge0fcw28bZ-lAMdlVq3PJOsXTZSzZy_ndc,76
11
- rag_server-0.0.1.dist-info/RECORD,,