ragpy-core 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,206 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Azure OpenAI relay utilities for the RAGpy pipeline.
4
+
5
+ This module provides a thin, deterministic wrapper around the Azure OpenAI
6
+ Python SDK. It exposes simple embedding and chat‑completion functions used
7
+ throughout the RAG workflow, while keeping configuration isolated and
8
+ monkeypatch‑friendly for unit testing.
9
+
10
+ Two independent clients are supported:
11
+ • Embedding client — for generating text embeddings.
12
+ • Chat completion client — for LLM‑based scoring, compression, and
13
+ answer generation.
14
+
15
+ The relay is intentionally minimal to ensure predictable behavior and easy
16
+ replacement during offline tests.
17
+
18
+ """
19
+ from openai import AzureOpenAI
20
+
21
+ #new Azure OpenAI SDK
22
+ embedding_Client = None
23
+ chat_Client = None
24
+ embedding_Deployment = None
25
+ completion_Deployment = None
26
+
27
+ def SetCompletionEndpointInfo(completion_Endpoint, completion_Deployment_Name, completion_Api_Version, api_Key):
28
+ """
29
+ Configure the Azure OpenAI chat completion client.
30
+
31
+ This sets up the client used for all LLM‑based operations in the RAG
32
+ pipeline, including reranking, compression, and final answer generation.
33
+
34
+ Args:
35
+ completion_Endpoint (str):
36
+ The Azure endpoint URL for chat completions.
37
+ completion_Deployment_Name (str):
38
+ The name of the deployed chat model.
39
+ completion_Api_Version (str):
40
+ The API version to use.
41
+ api_Key (str):
42
+ The Azure OpenAI API key.
43
+
44
+ Returns:
45
+ None
46
+ """
47
+ global chat_Client, completion_Deployment
48
+ completion_Deployment = completion_Deployment_Name
49
+
50
+ chat_Client = AzureOpenAI(
51
+ azure_endpoint=completion_Endpoint,
52
+ api_version=completion_Api_Version,
53
+ api_key=api_Key
54
+
55
+ )
56
+
57
+ return
58
+
59
+ def SetEmbeddingEndpointInfo(embedding_Endpoint, embedding_Deployment_Name, embedding_Api_Version, azure_Api_Key):
60
+ """
61
+ Configure the Azure OpenAI embedding client.
62
+
63
+ This sets up the client used for generating embeddings during ingestion
64
+ and query processing.
65
+
66
+ Args:
67
+ embedding_Endpoint (str):
68
+ The Azure endpoint URL for embeddings.
69
+ embedding_Deployment_Name (str):
70
+ The name of the deployed embedding model.
71
+ embedding_Api_Version (str):
72
+ The API version to use.
73
+ azure_Api_Key (str):
74
+ The Azure OpenAI API key.
75
+
76
+ Returns:
77
+ None
78
+ """
79
+ global embedding_Client, embedding_Deployment
80
+ embedding_Deployment = embedding_Deployment_Name
81
+
82
+ embedding_Client = AzureOpenAI(
83
+ azure_endpoint=embedding_Endpoint,
84
+ api_version=embedding_Api_Version,
85
+ api_key = azure_Api_Key
86
+ )
87
+
88
+ return
89
+
90
+ def EmbedText(text):
91
+ """
92
+ Generate an embedding vector for a single text string.
93
+
94
+ Args:
95
+ text (str):
96
+ The input text to embed.
97
+
98
+ Returns:
99
+ list[float]:
100
+ A 3072‑dimensional embedding vector produced by the configured
101
+ Azure OpenAI embedding model.
102
+
103
+ Raises:
104
+ ValueError:
105
+ If the returned embedding dimension is unexpected.
106
+
107
+ Notes:
108
+ - This function is intentionally simple to support monkeypatching
109
+ during unit tests.
110
+ - The embedding client must be configured before calling this
111
+ function.
112
+ """
113
+ vector = embedding_Client.embeddings.create(
114
+ model=embedding_Deployment,
115
+ input=text
116
+ ).data[0].embedding
117
+
118
+ if len(vector) != 3072:
119
+ raise ValueError(f"Unexpected embedding dimension: {len(vector)}")
120
+
121
+ return vector
122
+
123
+ def EmbedChunksInBatches(batched_chunks, batch_size=16):
124
+ """
125
+ Embed multiple batches of text chunks.
126
+
127
+ Each batch is sent to the embedding model as a single request, and all
128
+ resulting vectors are returned in a flat list (one embedding per chunk).
129
+
130
+ Args:
131
+ batched_chunks (list[list[str]]):
132
+ A list of batches, each containing text chunks.
133
+ batch_size (int):
134
+ Optional batch size hint (unused but kept for compatibility).
135
+
136
+ Returns:
137
+ list[list[float]]:
138
+ A flat list of embedding vectors, one per chunk.
139
+
140
+ Notes:
141
+ - Ordering is preserved: embeddings appear in the same order as
142
+ the input chunks.
143
+ - This function is intentionally simple to support monkeypatching.
144
+ """
145
+ all_vectors = []
146
+
147
+ for batch in batched_chunks:
148
+ response = embedding_Client.embeddings.create(
149
+ model=embedding_Deployment,
150
+ input=batch
151
+ )
152
+
153
+ for item in response.data:
154
+ all_vectors.append(item.embedding)
155
+
156
+ return all_vectors
157
+
158
+ def ChatCompletion(prompt):
159
+ """
160
+ Generate a chat completion response using the configured Azure OpenAI client.
161
+
162
+ Args:
163
+ prompt (str):
164
+ The user prompt to send to the model.
165
+
166
+ Returns:
167
+ str:
168
+ The model's response text. Returns an empty string if no chat
169
+ client is configured.
170
+
171
+ Notes:
172
+ - This function is intentionally simple to support monkeypatching
173
+ during unit tests.
174
+ - No additional formatting or metadata is returned.
175
+ """
176
+ client = GetChatClient()
177
+ if client is None:
178
+ # In tests, FakeChat will override this entirely.
179
+ return ""
180
+
181
+ response = client.chat.completions.create(
182
+ model=GetChatModel(),
183
+ messages=[{"role": "user", "content": prompt}]
184
+ )
185
+
186
+ return response.choices[0].message.content
187
+
188
+ def GetChatClient():
189
+ """
190
+ Retrieve the configured Azure OpenAI chat client.
191
+
192
+ Returns:
193
+ AzureOpenAI | None:
194
+ The chat client instance, or None if not configured.
195
+ """
196
+ return chat_Client
197
+
198
+ def GetChatModel():
199
+ """
200
+ Retrieve the configured chat model deployment name.
201
+
202
+ Returns:
203
+ str | None:
204
+ The model deployment name, or None if not configured.
205
+ """
206
+ return completion_Deployment
@@ -0,0 +1,64 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ LLM-based context compressor for the RAGpy pipeline.
4
+
5
+ This module provides a lightweight, deterministic interface for reducing
6
+ multiple retrieved chunks into a single compact summary. The compressor
7
+ uses AzureOpenAIRelay to generate a concise, query‑aware context block
8
+ that preserves meaning while reducing token usage. The design is
9
+ intentionally simple to support both production usage and easy
10
+ monkeypatching during unit tests.
11
+
12
+ """
13
+ from ragpy import AzureOpenAIRelay as AI
14
+
15
+ def CompressChunks(query, chunks):
16
+ """
17
+ Compress a list of retrieved chunks into a shorter, unified context block.
18
+
19
+ This function sends the provided chunks and user query to an LLM,
20
+ requesting a concise summary that preserves the information most
21
+ relevant to the query. It is primarily used to reduce token usage
22
+ during prompt construction while maintaining grounding in the
23
+ retrieved content.
24
+
25
+ Args:
26
+ query (str):
27
+ The user query that determines what information is relevant.
28
+ chunks (list[dict]):
29
+ A list of chunk objects, each containing at least a "chunk"
30
+ field (or "text") holding the raw text to be compressed.
31
+
32
+ Returns:
33
+ str:
34
+ A compressed context string. Returns an empty string if the
35
+ compression model is not configured or returns no output.
36
+
37
+ Notes:
38
+ - The function is intentionally simple to allow monkeypatching
39
+ during unit tests.
40
+ - The compression prompt is defined inline for clarity and
41
+ isolation.
42
+ - If AzureOpenAIRelay is not configured, the function safely
43
+ returns an empty string.
44
+ """
45
+ # If monkeypatched, FakeCompress will run instead of this function.
46
+ if AI.GetChatClient() is None:
47
+ return ""
48
+
49
+ text = "\n\n".join(c.get("chunk") or c.get("text", "") for c in chunks)
50
+
51
+ prompt = f"""
52
+ You are a compression model. Your job is to compress the following text
53
+ into a concise summary that preserves meaning and focuses on information
54
+ relevant to the user query:
55
+
56
+ Query:
57
+ {query}
58
+
59
+ {text}
60
+
61
+ Return ONLY the compressed summary.
62
+ """
63
+
64
+ return AI.ChatCompletion(prompt)
@@ -0,0 +1,77 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Retrieval Layer for RAG
4
+ Created on Tue Jun 2 2026
5
+ @author: William
6
+ """
7
+
8
+ from ragpy import VectorDatabase as VD
9
+ import logging
10
+ from typing import List, Dict, Any
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ def RetrieveTopK(embedded_query: List[float], k: int = 5) -> List[Dict[str, Any]]:
15
+ """
16
+ Retrieve the top‑K most relevant chunks from the active ChromaDB collection.
17
+
18
+ This function performs a vector similarity search against the currently
19
+ opened database collection using the provided query embedding. Results
20
+ are normalized into a consistent structure expected by downstream
21
+ components such as the reranker, compressor, and orchestrator.
22
+
23
+ Args:
24
+ embedded_query (list[float]):
25
+ The embedding vector representing the user query. Must match the
26
+ dimensionality of the embeddings stored in the database.
27
+ k (int):
28
+ Number of chunks to retrieve from the vector database.
29
+
30
+ Returns:
31
+ list[dict]:
32
+ A list of retrieved chunk objects, each containing:
33
+ - "text": The raw chunk text stored in the database.
34
+ - "metadata": Metadata associated with the chunk, including:
35
+ • "source": The originating filename.
36
+ • "chunk_id": The index of the chunk within that file.
37
+ - "distance": The vector distance score returned by ChromaDB.
38
+ Lower values indicate closer matches.
39
+
40
+ Raises:
41
+ RuntimeError:
42
+ If no database collection is currently open.
43
+
44
+ Notes:
45
+ - Distance values are returned directly from ChromaDB. If the backend
46
+ does not provide distances, they default to 0.0.
47
+ - Output format is intentionally normalized to ensure compatibility
48
+ with Reranker, ChunkCompressor, and RAGOrchestrator.
49
+ """
50
+ if VD.collection is None:
51
+ raise RuntimeError("No database open. Call CreateDatabase() or OpenDatabase() first.")
52
+
53
+ if len(embedded_query) != 3072:
54
+ raise ValueError("Query embedding dimension mismatch.")
55
+
56
+ results = VD.collection.query(
57
+ query_embeddings=[embedded_query],
58
+ n_results=k
59
+ )
60
+
61
+ docs = results["documents"][0]
62
+ metas = results["metadatas"][0]
63
+ dists = results.get("distances", [[0.0] * len(docs)])[0]
64
+ logger.debug(f"Distance range: {min(dists)} to {max(dists)}")
65
+
66
+ normalized = []
67
+ for doc, meta, dist in zip(docs, metas, dists):
68
+ normalized.append({
69
+ "text": doc,
70
+ "metadata": meta,
71
+ "distance": dist
72
+
73
+ })
74
+
75
+ logger.info(f"Retrieved {len(normalized)} chunks from database.")
76
+
77
+ return normalized
@@ -0,0 +1,207 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ RAG Orchestrator for the RAGpy pipeline.
4
+
5
+ This module coordinates the full Retrieval‑Augmented Generation workflow,
6
+ including file ingestion, chunking, batching, embedding, retrieval,
7
+ reranking, context construction, prompt building, and final answer
8
+ generation. It acts as the high‑level interface that ties together all
9
+ RAGpy components:
10
+
11
+ • VectorDatabase — persistent storage of embedded chunks
12
+ • FileLoader — file loading and text extraction
13
+ • TextChunker — chunking of raw text
14
+ • ChunkBatcher — batching for efficient embedding
15
+ • AzureOpenAIRelay — embedding and LLM completion
16
+ • DatabaseRetriever — vector search
17
+ • Reranker — LLM‑based and deterministic reranking
18
+ • ChunkCompressor — context compression for efficient prompting
19
+
20
+ All functions in this module operate at the orchestration level and are
21
+ intended to be used directly by applications integrating RAGpy.
22
+ """
23
+ import os
24
+ import logging
25
+
26
+ from ragpy.VectorDatabase import OpenDatabase, CheckIfInDatabase, AddToDatabase
27
+ from ragpy.loaders.FileLoader import LoadFile
28
+ from ragpy.loaders.TextChunker import TextToChunk
29
+ from ragpy.batching.ChunkBatcher import BatchChunks
30
+
31
+ import ragpy.AzureOpenAIRelay as AI
32
+ import ragpy.Reranker as RR
33
+ import ragpy.DatabaseRetriever as DR
34
+ import ragpy.ChunkCompressor as CC
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+ def IngestFile(path, databaseName, databaseLoc = "./vectorDB"):
39
+ """
40
+ Ingest a file into the vector database.
41
+
42
+ Performs the full ingestion pipeline:
43
+ 1. Load the file from disk.
44
+ 2. Chunk the extracted text.
45
+ 3. Batch chunks for efficient embedding.
46
+ 4. Embed all batches using AzureOpenAIRelay.
47
+ 5. Store embedded chunks in the persistent vector database.
48
+
49
+ If the file has already been ingested, the operation is skipped.
50
+
51
+ Args:
52
+ path (str):
53
+ Path to the file to ingest.
54
+ databaseName (str):
55
+ Name of the vector database collection.
56
+ databaseLoc (str):
57
+ Directory where the database is stored.
58
+
59
+ Returns:
60
+ None
61
+ """
62
+ filename = os.path.basename(path)
63
+ logger.info(f"Found the file: {filename}")
64
+
65
+ #Open the Database
66
+ OpenDatabase(databaseName, databaseLoc)
67
+
68
+ # Check if already embedded
69
+ if CheckIfInDatabase(path):
70
+ logger.info(f"Skipping {filename}: already embedded.")
71
+ return
72
+ else:
73
+ logger.info("File did not exist in DB, Embedding.")
74
+ text = LoadFile(path)
75
+ chunks = TextToChunk(text)
76
+ batches = list(BatchChunks(chunks))
77
+ vectors = AI.EmbedChunksInBatches(batches)
78
+
79
+ AddToDatabase(chunks, vectors, filename)
80
+
81
+ logger.info(f"Index operation complete. {filename} was split into {len(chunks)} chunks, batched into {len(batches)} batches, and indexed.")
82
+
83
+ def BuildContext(retrieved_chunks):
84
+ """
85
+ Construct a readable context block from retrieved chunks.
86
+
87
+ Each chunk is formatted with a citation marker and includes metadata
88
+ such as the source filename and chunk index. This context block is
89
+ used directly in RAG prompt construction.
90
+
91
+ Args:
92
+ retrieved_chunks (list[dict]):
93
+ Retrieved chunk objects containing:
94
+ - "text": The chunk text.
95
+ - "metadata": Metadata including "source" and "chunk_id".
96
+
97
+ Returns:
98
+ str:
99
+ A formatted context block with citation markers.
100
+ """
101
+ context = []
102
+
103
+ for i, r in enumerate(retrieved_chunks):
104
+ source = r["metadata"].get("source", "unknown")
105
+ chunk_id = r["metadata"].get("chunk_id", "N/A")
106
+ text = r.get("text")
107
+
108
+ context.append(f"[CITATION {i}] Source: {source} (chunk {chunk_id})\n{text}\n")
109
+
110
+ return "\n".join(context)
111
+
112
+ def BuildPrompt(context, query, use_citations=True):
113
+ """
114
+ Build a full RAG prompt including context, optional citation instructions,
115
+ and the user query.
116
+
117
+ Args:
118
+ context (str):
119
+ The context block to include in the prompt.
120
+ query (str):
121
+ The user question.
122
+ use_citations (bool):
123
+ Whether to include citation instructions for the LLM.
124
+
125
+ Returns:
126
+ str:
127
+ The constructed prompt ready for LLM completion.
128
+ """
129
+ if use_citations:
130
+ citation_instructions = """
131
+ When you use information from a chunk, cite it like this: [CITATION X].
132
+ Do not invent citations. Only cite chunks that appear in the context.
133
+ """
134
+ else:
135
+ citation_instructions = ""
136
+
137
+ prompt = f"""
138
+ {citation_instructions}
139
+
140
+ ### Context:
141
+ {context}
142
+
143
+ ### Focus specifically on information relevant to the user query:
144
+ {query}
145
+
146
+ ### Your Answer (friendly, readable, grounded):
147
+ """
148
+ return prompt
149
+
150
+ def GenerateAnswer(query, k=5):
151
+ """
152
+ Generate a grounded answer using the full RAG pipeline.
153
+
154
+ Steps:
155
+ 1. Embed the user query.
156
+ 2. Retrieve top‑K candidate chunks from the vector database.
157
+ 3. Rerank candidates using the LLM-based reranker.
158
+ 4. Compress the top chunks into a compact context summary.
159
+ 5. Build a RAG prompt using the compressed context.
160
+ 6. Generate the final answer using AzureOpenAIRelay.
161
+
162
+ Args:
163
+ query (str):
164
+ The user question.
165
+ k (int):
166
+ Number of chunks to retrieve before reranking.
167
+
168
+ Returns:
169
+ str:
170
+ The final grounded answer generated by the LLM.
171
+
172
+ Raises:
173
+ ValueError:
174
+ If the query embedding has an unexpected dimensionality.
175
+ """
176
+ embedded_Query = AI.EmbedText(query)
177
+
178
+ if len(embedded_Query) != 3072:
179
+ raise ValueError("Query embedding dimension mismatch.")
180
+
181
+ # Step 1: Retrieve more chunks
182
+ retrieved = DR.RetrieveTopK(embedded_Query, k)
183
+
184
+ # Step 2: Rerank them
185
+ reranked = RR.RerankLLM(query, retrieved)
186
+
187
+ # Step 3: Keep top chunks for compression
188
+ top_for_compression = [
189
+ {"chunk": item["text"], "metadata": item["metadata"]}
190
+ for item in reranked[:10]
191
+ ]
192
+
193
+ # Step 4: Compress them
194
+ compressed_context = CC.CompressChunks(query, top_for_compression)
195
+
196
+ # Step 5: Build prompt using compressed context
197
+ prompt = BuildPrompt(compressed_context, query, use_citations=False)
198
+
199
+ # Step 6: Generate answer
200
+ raw_answer = AI.ChatCompletion(prompt)
201
+
202
+ # Step 7: Resolve citations (optional with compression)
203
+ # You can skip citations here OR keep them if you want
204
+ # but compressed context loses chunk-level mapping.
205
+ # So we skip citations for compressed mode.
206
+
207
+ return raw_answer
ragpy-core/Reranker.py ADDED
@@ -0,0 +1,118 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ LLM-based and deterministic rerankers for RAG pipelines.
4
+
5
+ This module provides two reranking strategies:
6
+
7
+ • RerankLLM — the primary reranker used during real RAGpy execution.
8
+ It evaluates chunk relevance using an LLM scoring prompt and returns
9
+ chunks sorted by descending relevance score while preserving metadata.
10
+
11
+ • Rerank — a deterministic, test-friendly fallback used exclusively
12
+ for unit testing. It sorts chunks by text length to ensure stable,
13
+ reproducible ordering without requiring an LLM.
14
+
15
+ Both functions accept retrieved chunk objects produced by the retrieval
16
+ layer and return normalized dictionaries suitable for downstream
17
+ compression and prompt construction.
18
+ """
19
+
20
+ from ragpy import AzureOpenAIRelay as AI
21
+
22
+ def RerankLLM(query, retrieved_chunks):
23
+ """
24
+ Rerank retrieved chunks using an LLM-based scoring model.
25
+
26
+ Each chunk is evaluated for relevance to the user query by sending a
27
+ scoring prompt to the LLM via AzureOpenAIRelay. The model is expected
28
+ to return a floating‑point number between 0 and 1. Invalid or malformed
29
+ output safely defaults to 0.0. Scores are clamped to the range [0, 1].
30
+
31
+ Args:
32
+ query (str):
33
+ The user query used to evaluate relevance.
34
+ retrieved_chunks (list[dict]):
35
+ A list of retrieved chunk objects containing:
36
+ - "text": The chunk text.
37
+ - "metadata": Metadata including "source" and "chunk_id".
38
+ - "distance": The vector distance score.
39
+
40
+ Returns:
41
+ list[dict]:
42
+ A list of dictionaries with keys:
43
+ - "text": The chunk text.
44
+ - "metadata": Original metadata preserved.
45
+ - "score": The relevance score (float).
46
+ Sorted in descending score order.
47
+
48
+ Notes:
49
+ - This function is used during real RAG pipeline execution.
50
+ - It is intentionally monkeypatch‑friendly for offline testing.
51
+ - Metadata is preserved to support compression and citation workflows.
52
+ """
53
+ scored = []
54
+
55
+ for r in retrieved_chunks:
56
+ chunk_text = r.get("chunk") or r.get("text")
57
+
58
+ prompt = f"""
59
+ You are a scoring model. Rate the relevance of the chunk to the query from 0 to 1.
60
+ Return ONLY a number.
61
+
62
+ Query:
63
+ {query}
64
+
65
+ Chunk:
66
+ {chunk_text}
67
+ """
68
+
69
+ score_text = AI.ChatCompletion(prompt).strip()
70
+
71
+ try:
72
+ score = float(score_text)
73
+ score = max(0.0, min(score, 1.0))
74
+ except:
75
+ score = 0.0
76
+
77
+ scored.append({
78
+ "text": chunk_text,
79
+ "metadata": r.get("metadata", {}),
80
+ "score": score
81
+ })
82
+
83
+ scored.sort(key=lambda x: x["score"], reverse=True)
84
+ return scored
85
+
86
+ def Rerank(query, retrieved_chunks):
87
+ """
88
+ Deterministic, test-friendly reranker used only for unit testing.
89
+
90
+ This fallback reranker provides predictable behavior without relying
91
+ on an LLM. It normalizes chunk objects and sorts them by text length,
92
+ placing shorter chunks first. All metadata is preserved.
93
+
94
+ Args:
95
+ query (str):
96
+ The user query (unused in this heuristic but included for API consistency).
97
+ retrieved_chunks (list[dict]):
98
+ A list of retrieved chunk objects containing "text" and "metadata".
99
+
100
+ Returns:
101
+ list[dict]:
102
+ A list of normalized chunk dictionaries sorted by ascending text length.
103
+
104
+ Notes:
105
+ - This function is used exclusively for testing.
106
+ - Tests expect shorter chunks to appear first.
107
+ - Metadata is preserved to maintain compatibility with downstream steps.
108
+ """
109
+ normalized = []
110
+ for r in retrieved_chunks:
111
+ text = r.get("text") or r.get("chunk") or ""
112
+ normalized.append({
113
+ "text": text,
114
+ **{k: v for k, v in r.items() if k not in ["text", "chunk"]}
115
+ })
116
+
117
+ # Heuristic: shorter text is usually more focused → tests expect this ordering.
118
+ return sorted(normalized, key=lambda c: len(c["text"]))
@@ -0,0 +1,184 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Global state:
3
+ # - `client` holds the active ChromaDB PersistentClient
4
+ # - `collection` holds the active ChromaDB collection
5
+ """
6
+ ChromaDB persistence utilities for the RAGpy pipeline.
7
+
8
+ This module provides a thin wrapper around ChromaDB's PersistentClient,
9
+ offering simple functions for creating, opening, and populating a vector
10
+ database. The design is intentionally minimal and deterministic to support
11
+ production usage and easy monkeypatching during unit tests.
12
+
13
+ Global State:
14
+ client — the active ChromaDB PersistentClient
15
+ collection — the active ChromaDB collection
16
+ """
17
+ import chromadb
18
+ import os
19
+ import logging
20
+
21
+ client = None
22
+ collection = None
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ def CreateDatabase(dbName, path="./vectorDB"):
27
+ """
28
+ Create a new persistent ChromaDB collection.
29
+
30
+ Initializes a PersistentClient at the given path and unconditionally
31
+ creates a new collection with the specified name. If the collection
32
+ already exists, ChromaDB will raise an error.
33
+
34
+ Args:
35
+ dbName (str):
36
+ Name of the collection to create.
37
+ path (str):
38
+ Filesystem path for the persistent database.
39
+
40
+ Returns:
41
+ chromadb.api.models.Collection.Collection:
42
+ The newly created collection.
43
+
44
+ Side Effects:
45
+ - Sets the global `client` and `collection` variables.
46
+ """
47
+ global client, collection
48
+
49
+ client = chromadb.PersistentClient(path)
50
+
51
+ collection = client.create_collection(dbName)
52
+
53
+ return collection
54
+
55
+
56
+ def OpenDatabase(dbName, path="./vectorDB"):
57
+ """
58
+ Open an existing ChromaDB collection, creating it if necessary.
59
+
60
+ Initializes a PersistentClient at the given path and attempts to load
61
+ an existing collection. If the collection does not exist, it is created
62
+ automatically.
63
+
64
+ Args:
65
+ dbName (str):
66
+ Name of the collection to open.
67
+ path (str):
68
+ Filesystem path for the persistent database.
69
+
70
+ Returns:
71
+ chromadb.api.models.Collection.Collection:
72
+ The opened or newly created collection.
73
+
74
+ Side Effects:
75
+ - Sets the global `client` and `collection` variables.
76
+ """
77
+ global client, collection
78
+
79
+ client = chromadb.PersistentClient(path)
80
+
81
+ try:
82
+ collection = client.get_collection(dbName)
83
+ except chromadb.errors.NotFoundError:
84
+ logger.info("Database does not exist. Creating...")
85
+ collection = CreateDatabase(dbName, path)
86
+
87
+ return collection
88
+
89
+ def AddToDatabase(chunks, vectors, source_name):
90
+ """
91
+ Add embedded text chunks to the active ChromaDB collection.
92
+
93
+ Stores text chunks, their embedding vectors, and associated metadata
94
+ in the currently opened collection. Each chunk receives a globally
95
+ unique ID based on the source filename and its index.
96
+
97
+ Args:
98
+ chunks (list[str]):
99
+ Text chunks extracted from a document.
100
+ vectors (list[list[float]]):
101
+ Embedding vectors corresponding to each chunk.
102
+ source_name (str):
103
+ The name of the source document (e.g., filename).
104
+
105
+ Returns:
106
+ None
107
+
108
+ Raises:
109
+ RuntimeError:
110
+ If no database collection is currently open.
111
+ ValueError:
112
+ If chunk/vector counts differ or an embedding has an unexpected
113
+ dimensionality.
114
+
115
+ Notes:
116
+ - Embedding vectors are validated to ensure a 3072‑dimensional shape.
117
+ - Metadata stores only lightweight identifiers (`source`, `chunk_id`)
118
+ to avoid duplicating chunk text.
119
+ """
120
+ global collection
121
+ if collection is None:
122
+ raise RuntimeError("No database open. Call CreateDatabase() or OpenDatabase() first.")
123
+
124
+ if len(chunks) != len(vectors):
125
+ raise ValueError("Chunks and vectors must have the same length.")
126
+
127
+ ids = []
128
+ embeddings = []
129
+ documents = []
130
+ metadatas = []
131
+
132
+ for i, (chunk, vector) in enumerate(zip(chunks, vectors)):
133
+
134
+ if len(vector) != 3072:
135
+ raise ValueError(f"Invalid embedding length: {len(vector)}")
136
+
137
+ ids.append(f"{source_name}_{i}")
138
+ embeddings.append(vector)
139
+ documents.append(chunk)
140
+ metadatas.append({"source": source_name, "chunk_id": i})
141
+
142
+ collection.add(
143
+ ids=ids,
144
+ embeddings=embeddings,
145
+
146
+ documents=documents,
147
+ metadatas=metadatas
148
+ )
149
+
150
+ def CheckIfInDatabase(filepath):
151
+ """
152
+ Determine whether a file has already been ingested into the database.
153
+
154
+ Uses metadata filtering to check whether any stored chunk originates
155
+ from the given file.
156
+
157
+ Args:
158
+ filepath (str):
159
+ Path to the file being checked.
160
+
161
+ Returns:
162
+ bool:
163
+ True if any metadata entry references the file, False otherwise.
164
+
165
+ Raises:
166
+ RuntimeError:
167
+ If no database collection is currently open.
168
+
169
+ Notes:
170
+ - Matching is performed using the filename only (basename).
171
+ - Uses ChromaDB's `where` filtering for efficient lookup.
172
+ """
173
+ if collection is None:
174
+ raise RuntimeError("No database open. Call CreateDatabase() or OpenDatabase() first.")
175
+
176
+ filename = os.path.basename(filepath)
177
+
178
+ results = collection.get(where={"source": filename})
179
+
180
+ for m in results["metadatas"]:
181
+ if m["source"] == filename:
182
+ return True
183
+
184
+ return False
ragpy-core/__init__.py ADDED
@@ -0,0 +1,43 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Wed Jun 24 17:10:10 2026
4
+
5
+ @author: klusm
6
+ """
7
+
8
+ from .AzureOpenAIRelay import (
9
+ SetEmbeddingEndpointInfo,
10
+ SetCompletionEndpointInfo
11
+ )
12
+
13
+ from .RAGOrchestrator import (
14
+ IngestFile,
15
+ GenerateAnswer,
16
+ BuildContext,
17
+ BuildPrompt
18
+ )
19
+
20
+ from .VectorDatabase import (
21
+ OpenDatabase,
22
+ AddToDatabase,
23
+ CheckIfInDatabase
24
+ )
25
+
26
+ from .utils.CitationHelper import (
27
+ ResolveCitations,
28
+ BuildReferences
29
+ )
30
+
31
+ __all__ = [
32
+ "SetEmbeddingEndpointInfo",
33
+ "SetCompletionEndpointInfo",
34
+ "IngestFile",
35
+ "GenerateAnswer",
36
+ "BuildContext",
37
+ "BuildPrompt",
38
+ "OpenDatabase",
39
+ "AddToDatabase",
40
+ "CheckIfInDatabase",
41
+ "ResolveCitations",
42
+ "BuildReferences",
43
+ ]
@@ -0,0 +1,175 @@
1
+ Metadata-Version: 2.4
2
+ Name: ragpy-core
3
+ Version: 1.0.0
4
+ Summary: A modular Retrieval-Augmented Generation (RAG) pipeline for Python.
5
+ Author: William Klusman
6
+ Author-email: William Klusman <klusmannwilliam@gmail.com>
7
+ License: MIT License
8
+
9
+ Copyright (c) 2026 William Klusman
10
+
11
+ Permission is hereby granted, free of charge, to any person obtaining a copy
12
+ of this software and associated documentation files (the “Software”), to deal
13
+ in the Software without restriction, including without limitation the rights
14
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15
+ copies of the Software, and to permit persons to whom the Software is
16
+ furnished to do so, subject to the following conditions:
17
+
18
+ The above copyright notice and this permission notice shall be included in
19
+ all copies or substantial portions of the Software.
20
+
21
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27
+ DEALINGS IN THE SOFTWARE.
28
+ Keywords: RAG,retrieval-augmented-generation,LLM,vector-database,azure-openai,machine-learning,nlp
29
+ Classifier: Development Status :: 3 - Alpha
30
+ Classifier: Intended Audience :: Developers
31
+ Classifier: Intended Audience :: Science/Research
32
+ Classifier: License :: OSI Approved :: MIT License
33
+ Classifier: Programming Language :: Python :: 3
34
+ Classifier: Programming Language :: Python :: 3.9
35
+ Classifier: Programming Language :: Python :: 3.10
36
+ Classifier: Programming Language :: Python :: 3.11
37
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
38
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
39
+ Requires-Python: >=3.9
40
+ Description-Content-Type: text/markdown
41
+ License-File: LICENSE
42
+ Requires-Dist: chromadb>=0.4.0
43
+ Requires-Dist: numpy>=1.20
44
+ Requires-Dist: tiktoken>=0.5.0
45
+ Requires-Dist: requests>=2.0
46
+ Requires-Dist: openai>=1.0.0
47
+ Requires-Dist: pypdf>=4.0.0
48
+ Dynamic: author
49
+ Dynamic: license-file
50
+ Dynamic: requires-python
51
+
52
+ RAGpy
53
+ RAGpy is a lightweight, modular Retrieval-Augmented Generation (RAG) pipeline for Python. It provides a clear and testable architecture for document ingestion, chunking, embedding, retrieval, reranking, context compression, and grounded answer generation using Azure OpenAI and ChromaDB.
54
+
55
+ RAGpy is designed for developers who want a transparent, hackable RAG system without the complexity of large frameworks.
56
+
57
+ Features
58
+ Modular ingestion pipeline for text and PDF documents
59
+
60
+ Chunking and batching utilities for efficient embedding
61
+
62
+ Azure OpenAI embeddings and chat completions
63
+
64
+ ChromaDB vector database integration
65
+
66
+ LLM-based reranking for improved retrieval quality
67
+
68
+ Context compression to reduce token usage
69
+
70
+ Fully monkeypatch-friendly design for offline testing
71
+
72
+ Clean architecture suitable for extension and customization
73
+
74
+ Installation
75
+ Once published to PyPI:
76
+
77
+ Code
78
+ pip install ragpy
79
+ For development:
80
+
81
+ Code
82
+ git clone https://github.com/yourusername/ragpy
83
+ cd ragpy
84
+ pip install -e .
85
+ Quickstart Example
86
+ python
87
+ from ragpy.RAGOrchestrator import IngestFile, GenerateAnswer
88
+ from ragpy.VectorDatabase import OpenDatabase
89
+
90
+ OpenDatabase("AeroDB", "./vectorDB")
91
+ IngestFile("engine_vibration.pdf", "AeroDB")
92
+
93
+ answer = GenerateAnswer("What causes engine vibration?", "AeroDB")
94
+ print(answer)
95
+ How RAGpy Works
96
+ 1. Ingestion
97
+ Load text or PDF using FileLoader
98
+
99
+ Chunk text using TextChunker
100
+
101
+ Batch chunks using ChunkBatcher
102
+
103
+ Generate embeddings with Azure OpenAI
104
+
105
+ Store vectors and metadata in ChromaDB
106
+
107
+ 2. Retrieval
108
+ Embed the user query
109
+
110
+ Retrieve top-K candidates from the vector database
111
+
112
+ 3. Reranking
113
+ Use an LLM-based reranker to reorder retrieved chunks by relevance
114
+
115
+ 4. Compression
116
+ Summarize top chunks into a compact context block
117
+
118
+ 5. Answer Generation
119
+ Build a prompt using compressed context
120
+
121
+ Generate a grounded answer using Azure OpenAI
122
+
123
+ Project Structure
124
+ Code
125
+ ragpy/
126
+ AzureOpenAIRelay.py
127
+ RAGOrchestrator.py
128
+ VectorDatabase.py
129
+ Reranker.py
130
+ ChunkCompressor.py
131
+ loaders/
132
+ FileLoader.py
133
+ TextChunker.py
134
+ batching/
135
+ ChunkBatcher.py
136
+ tests/
137
+ docs/
138
+ Requirements
139
+ Python 3.9+
140
+
141
+ ChromaDB
142
+
143
+ numpy
144
+
145
+ tiktoken
146
+
147
+ pypdf
148
+
149
+ openai (Azure OpenAI SDK)
150
+
151
+ Testing
152
+ RAGpy includes a full pytest suite. All Azure calls are monkeypatch-friendly, allowing offline testing with mock LLMs.
153
+
154
+ Run tests:
155
+
156
+ Code
157
+ pytest -q
158
+ Contributing
159
+ Contributions are welcome.
160
+ Please open an issue or submit a pull request on GitHub.
161
+
162
+ Planned enhancements include:
163
+
164
+ Local embedding support (sentence-transformers)
165
+
166
+ Hybrid retrieval (vector + keyword)
167
+
168
+ Multimodal RAG (image + text)
169
+
170
+ Evaluation tools for relevance and faithfulness
171
+
172
+ Agentic RAG extensions
173
+
174
+ License
175
+ RAGpy is released under the MIT License.
@@ -0,0 +1,12 @@
1
+ ragpy-core/AzureOpenAIRelay.py,sha256=IqD_zkMnDlmMdyKfTEtJsYS3s7yXR71WrBewkYkHfVI,6147
2
+ ragpy-core/ChunkCompressor.py,sha256=iVAgW7Ku5FQx8ntjEregP8O2J0Qc9OFNxOv-iO5lX-0,2277
3
+ ragpy-core/DatabaseRetriever.py,sha256=jvbN9esU5RzQW98uA6qYE7SQvaGx87XGQdslZ9yIxu8,2793
4
+ ragpy-core/RAGOrchestrator.py,sha256=lR1GldVnF0xVGCfUijETYh4kdWHS8Zd_-FitxuIbV7k,6857
5
+ ragpy-core/Reranker.py,sha256=_AMvwtQp1xulBq_7iInXRuiian39tfkGxzNOLyCdCIg,4167
6
+ ragpy-core/VectorDatabase.py,sha256=xado-9gobVEs14tcJfFe7NmPVCT6Y3P168RVcit8w-0,5576
7
+ ragpy-core/__init__.py,sha256=Vih4zqBsj9rJKXSrdhdCAU9ryEBr-ZHCmbdjvrX_42k,769
8
+ ragpy_core-1.0.0.dist-info/licenses/LICENSE,sha256=N0pk1ZLa00VYhRM9xzwThPMdfL_8acMtFLhMZj_fSeA,1119
9
+ ragpy_core-1.0.0.dist-info/METADATA,sha256=HDopMelIJgs-WSSb4KHNjpSlExBvnBvrBnl4g7FSzyc,5313
10
+ ragpy_core-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
11
+ ragpy_core-1.0.0.dist-info/top_level.txt,sha256=2nLuavOLaI-fiobSkBx_CDqjD1kWlO2_2TCgOKT_rhY,11
12
+ ragpy_core-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 William Klusman
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the “Software”), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ DEALINGS IN THE SOFTWARE.
@@ -0,0 +1 @@
1
+ ragpy-core