ragpy-core 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragpy-core/AzureOpenAIRelay.py +206 -0
- ragpy-core/ChunkCompressor.py +64 -0
- ragpy-core/DatabaseRetriever.py +77 -0
- ragpy-core/RAGOrchestrator.py +207 -0
- ragpy-core/Reranker.py +118 -0
- ragpy-core/VectorDatabase.py +184 -0
- ragpy-core/__init__.py +43 -0
- ragpy_core-1.0.0.dist-info/METADATA +175 -0
- ragpy_core-1.0.0.dist-info/RECORD +12 -0
- ragpy_core-1.0.0.dist-info/WHEEL +5 -0
- ragpy_core-1.0.0.dist-info/licenses/LICENSE +21 -0
- ragpy_core-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
Azure OpenAI relay utilities for the RAGpy pipeline.
|
|
4
|
+
|
|
5
|
+
This module provides a thin, deterministic wrapper around the Azure OpenAI
|
|
6
|
+
Python SDK. It exposes simple embedding and chat‑completion functions used
|
|
7
|
+
throughout the RAG workflow, while keeping configuration isolated and
|
|
8
|
+
monkeypatch‑friendly for unit testing.
|
|
9
|
+
|
|
10
|
+
Two independent clients are supported:
|
|
11
|
+
• Embedding client — for generating text embeddings.
|
|
12
|
+
• Chat completion client — for LLM‑based scoring, compression, and
|
|
13
|
+
answer generation.
|
|
14
|
+
|
|
15
|
+
The relay is intentionally minimal to ensure predictable behavior and easy
|
|
16
|
+
replacement during offline tests.
|
|
17
|
+
|
|
18
|
+
"""
|
|
19
|
+
from openai import AzureOpenAI
|
|
20
|
+
|
|
21
|
+
#new Azure OpenAI SDK
|
|
22
|
+
embedding_Client = None
|
|
23
|
+
chat_Client = None
|
|
24
|
+
embedding_Deployment = None
|
|
25
|
+
completion_Deployment = None
|
|
26
|
+
|
|
27
|
+
def SetCompletionEndpointInfo(completion_Endpoint, completion_Deployment_Name, completion_Api_Version, api_Key):
|
|
28
|
+
"""
|
|
29
|
+
Configure the Azure OpenAI chat completion client.
|
|
30
|
+
|
|
31
|
+
This sets up the client used for all LLM‑based operations in the RAG
|
|
32
|
+
pipeline, including reranking, compression, and final answer generation.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
completion_Endpoint (str):
|
|
36
|
+
The Azure endpoint URL for chat completions.
|
|
37
|
+
completion_Deployment_Name (str):
|
|
38
|
+
The name of the deployed chat model.
|
|
39
|
+
completion_Api_Version (str):
|
|
40
|
+
The API version to use.
|
|
41
|
+
api_Key (str):
|
|
42
|
+
The Azure OpenAI API key.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
None
|
|
46
|
+
"""
|
|
47
|
+
global chat_Client, completion_Deployment
|
|
48
|
+
completion_Deployment = completion_Deployment_Name
|
|
49
|
+
|
|
50
|
+
chat_Client = AzureOpenAI(
|
|
51
|
+
azure_endpoint=completion_Endpoint,
|
|
52
|
+
api_version=completion_Api_Version,
|
|
53
|
+
api_key=api_Key
|
|
54
|
+
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
return
|
|
58
|
+
|
|
59
|
+
def SetEmbeddingEndpointInfo(embedding_Endpoint, embedding_Deployment_Name, embedding_Api_Version, azure_Api_Key):
|
|
60
|
+
"""
|
|
61
|
+
Configure the Azure OpenAI embedding client.
|
|
62
|
+
|
|
63
|
+
This sets up the client used for generating embeddings during ingestion
|
|
64
|
+
and query processing.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
embedding_Endpoint (str):
|
|
68
|
+
The Azure endpoint URL for embeddings.
|
|
69
|
+
embedding_Deployment_Name (str):
|
|
70
|
+
The name of the deployed embedding model.
|
|
71
|
+
embedding_Api_Version (str):
|
|
72
|
+
The API version to use.
|
|
73
|
+
azure_Api_Key (str):
|
|
74
|
+
The Azure OpenAI API key.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
None
|
|
78
|
+
"""
|
|
79
|
+
global embedding_Client, embedding_Deployment
|
|
80
|
+
embedding_Deployment = embedding_Deployment_Name
|
|
81
|
+
|
|
82
|
+
embedding_Client = AzureOpenAI(
|
|
83
|
+
azure_endpoint=embedding_Endpoint,
|
|
84
|
+
api_version=embedding_Api_Version,
|
|
85
|
+
api_key = azure_Api_Key
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
return
|
|
89
|
+
|
|
90
|
+
def EmbedText(text):
|
|
91
|
+
"""
|
|
92
|
+
Generate an embedding vector for a single text string.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
text (str):
|
|
96
|
+
The input text to embed.
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
list[float]:
|
|
100
|
+
A 3072‑dimensional embedding vector produced by the configured
|
|
101
|
+
Azure OpenAI embedding model.
|
|
102
|
+
|
|
103
|
+
Raises:
|
|
104
|
+
ValueError:
|
|
105
|
+
If the returned embedding dimension is unexpected.
|
|
106
|
+
|
|
107
|
+
Notes:
|
|
108
|
+
- This function is intentionally simple to support monkeypatching
|
|
109
|
+
during unit tests.
|
|
110
|
+
- The embedding client must be configured before calling this
|
|
111
|
+
function.
|
|
112
|
+
"""
|
|
113
|
+
vector = embedding_Client.embeddings.create(
|
|
114
|
+
model=embedding_Deployment,
|
|
115
|
+
input=text
|
|
116
|
+
).data[0].embedding
|
|
117
|
+
|
|
118
|
+
if len(vector) != 3072:
|
|
119
|
+
raise ValueError(f"Unexpected embedding dimension: {len(vector)}")
|
|
120
|
+
|
|
121
|
+
return vector
|
|
122
|
+
|
|
123
|
+
def EmbedChunksInBatches(batched_chunks, batch_size=16):
|
|
124
|
+
"""
|
|
125
|
+
Embed multiple batches of text chunks.
|
|
126
|
+
|
|
127
|
+
Each batch is sent to the embedding model as a single request, and all
|
|
128
|
+
resulting vectors are returned in a flat list (one embedding per chunk).
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
batched_chunks (list[list[str]]):
|
|
132
|
+
A list of batches, each containing text chunks.
|
|
133
|
+
batch_size (int):
|
|
134
|
+
Optional batch size hint (unused but kept for compatibility).
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
list[list[float]]:
|
|
138
|
+
A flat list of embedding vectors, one per chunk.
|
|
139
|
+
|
|
140
|
+
Notes:
|
|
141
|
+
- Ordering is preserved: embeddings appear in the same order as
|
|
142
|
+
the input chunks.
|
|
143
|
+
- This function is intentionally simple to support monkeypatching.
|
|
144
|
+
"""
|
|
145
|
+
all_vectors = []
|
|
146
|
+
|
|
147
|
+
for batch in batched_chunks:
|
|
148
|
+
response = embedding_Client.embeddings.create(
|
|
149
|
+
model=embedding_Deployment,
|
|
150
|
+
input=batch
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
for item in response.data:
|
|
154
|
+
all_vectors.append(item.embedding)
|
|
155
|
+
|
|
156
|
+
return all_vectors
|
|
157
|
+
|
|
158
|
+
def ChatCompletion(prompt):
|
|
159
|
+
"""
|
|
160
|
+
Generate a chat completion response using the configured Azure OpenAI client.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
prompt (str):
|
|
164
|
+
The user prompt to send to the model.
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
str:
|
|
168
|
+
The model's response text. Returns an empty string if no chat
|
|
169
|
+
client is configured.
|
|
170
|
+
|
|
171
|
+
Notes:
|
|
172
|
+
- This function is intentionally simple to support monkeypatching
|
|
173
|
+
during unit tests.
|
|
174
|
+
- No additional formatting or metadata is returned.
|
|
175
|
+
"""
|
|
176
|
+
client = GetChatClient()
|
|
177
|
+
if client is None:
|
|
178
|
+
# In tests, FakeChat will override this entirely.
|
|
179
|
+
return ""
|
|
180
|
+
|
|
181
|
+
response = client.chat.completions.create(
|
|
182
|
+
model=GetChatModel(),
|
|
183
|
+
messages=[{"role": "user", "content": prompt}]
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
return response.choices[0].message.content
|
|
187
|
+
|
|
188
|
+
def GetChatClient():
|
|
189
|
+
"""
|
|
190
|
+
Retrieve the configured Azure OpenAI chat client.
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
AzureOpenAI | None:
|
|
194
|
+
The chat client instance, or None if not configured.
|
|
195
|
+
"""
|
|
196
|
+
return chat_Client
|
|
197
|
+
|
|
198
|
+
def GetChatModel():
|
|
199
|
+
"""
|
|
200
|
+
Retrieve the configured chat model deployment name.
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
str | None:
|
|
204
|
+
The model deployment name, or None if not configured.
|
|
205
|
+
"""
|
|
206
|
+
return completion_Deployment
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
LLM-based context compressor for the RAGpy pipeline.
|
|
4
|
+
|
|
5
|
+
This module provides a lightweight, deterministic interface for reducing
|
|
6
|
+
multiple retrieved chunks into a single compact summary. The compressor
|
|
7
|
+
uses AzureOpenAIRelay to generate a concise, query‑aware context block
|
|
8
|
+
that preserves meaning while reducing token usage. The design is
|
|
9
|
+
intentionally simple to support both production usage and easy
|
|
10
|
+
monkeypatching during unit tests.
|
|
11
|
+
|
|
12
|
+
"""
|
|
13
|
+
from ragpy import AzureOpenAIRelay as AI
|
|
14
|
+
|
|
15
|
+
def CompressChunks(query, chunks):
|
|
16
|
+
"""
|
|
17
|
+
Compress a list of retrieved chunks into a shorter, unified context block.
|
|
18
|
+
|
|
19
|
+
This function sends the provided chunks and user query to an LLM,
|
|
20
|
+
requesting a concise summary that preserves the information most
|
|
21
|
+
relevant to the query. It is primarily used to reduce token usage
|
|
22
|
+
during prompt construction while maintaining grounding in the
|
|
23
|
+
retrieved content.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
query (str):
|
|
27
|
+
The user query that determines what information is relevant.
|
|
28
|
+
chunks (list[dict]):
|
|
29
|
+
A list of chunk objects, each containing at least a "chunk"
|
|
30
|
+
field (or "text") holding the raw text to be compressed.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
str:
|
|
34
|
+
A compressed context string. Returns an empty string if the
|
|
35
|
+
compression model is not configured or returns no output.
|
|
36
|
+
|
|
37
|
+
Notes:
|
|
38
|
+
- The function is intentionally simple to allow monkeypatching
|
|
39
|
+
during unit tests.
|
|
40
|
+
- The compression prompt is defined inline for clarity and
|
|
41
|
+
isolation.
|
|
42
|
+
- If AzureOpenAIRelay is not configured, the function safely
|
|
43
|
+
returns an empty string.
|
|
44
|
+
"""
|
|
45
|
+
# If monkeypatched, FakeCompress will run instead of this function.
|
|
46
|
+
if AI.GetChatClient() is None:
|
|
47
|
+
return ""
|
|
48
|
+
|
|
49
|
+
text = "\n\n".join(c.get("chunk") or c.get("text", "") for c in chunks)
|
|
50
|
+
|
|
51
|
+
prompt = f"""
|
|
52
|
+
You are a compression model. Your job is to compress the following text
|
|
53
|
+
into a concise summary that preserves meaning and focuses on information
|
|
54
|
+
relevant to the user query:
|
|
55
|
+
|
|
56
|
+
Query:
|
|
57
|
+
{query}
|
|
58
|
+
|
|
59
|
+
{text}
|
|
60
|
+
|
|
61
|
+
Return ONLY the compressed summary.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
return AI.ChatCompletion(prompt)
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
Retrieval Layer for RAG
|
|
4
|
+
Created on Tue Jun 2 2026
|
|
5
|
+
@author: William
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from ragpy import VectorDatabase as VD
|
|
9
|
+
import logging
|
|
10
|
+
from typing import List, Dict, Any
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
def RetrieveTopK(embedded_query: List[float], k: int = 5) -> List[Dict[str, Any]]:
|
|
15
|
+
"""
|
|
16
|
+
Retrieve the top‑K most relevant chunks from the active ChromaDB collection.
|
|
17
|
+
|
|
18
|
+
This function performs a vector similarity search against the currently
|
|
19
|
+
opened database collection using the provided query embedding. Results
|
|
20
|
+
are normalized into a consistent structure expected by downstream
|
|
21
|
+
components such as the reranker, compressor, and orchestrator.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
embedded_query (list[float]):
|
|
25
|
+
The embedding vector representing the user query. Must match the
|
|
26
|
+
dimensionality of the embeddings stored in the database.
|
|
27
|
+
k (int):
|
|
28
|
+
Number of chunks to retrieve from the vector database.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
list[dict]:
|
|
32
|
+
A list of retrieved chunk objects, each containing:
|
|
33
|
+
- "text": The raw chunk text stored in the database.
|
|
34
|
+
- "metadata": Metadata associated with the chunk, including:
|
|
35
|
+
• "source": The originating filename.
|
|
36
|
+
• "chunk_id": The index of the chunk within that file.
|
|
37
|
+
- "distance": The vector distance score returned by ChromaDB.
|
|
38
|
+
Lower values indicate closer matches.
|
|
39
|
+
|
|
40
|
+
Raises:
|
|
41
|
+
RuntimeError:
|
|
42
|
+
If no database collection is currently open.
|
|
43
|
+
|
|
44
|
+
Notes:
|
|
45
|
+
- Distance values are returned directly from ChromaDB. If the backend
|
|
46
|
+
does not provide distances, they default to 0.0.
|
|
47
|
+
- Output format is intentionally normalized to ensure compatibility
|
|
48
|
+
with Reranker, ChunkCompressor, and RAGOrchestrator.
|
|
49
|
+
"""
|
|
50
|
+
if VD.collection is None:
|
|
51
|
+
raise RuntimeError("No database open. Call CreateDatabase() or OpenDatabase() first.")
|
|
52
|
+
|
|
53
|
+
if len(embedded_query) != 3072:
|
|
54
|
+
raise ValueError("Query embedding dimension mismatch.")
|
|
55
|
+
|
|
56
|
+
results = VD.collection.query(
|
|
57
|
+
query_embeddings=[embedded_query],
|
|
58
|
+
n_results=k
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
docs = results["documents"][0]
|
|
62
|
+
metas = results["metadatas"][0]
|
|
63
|
+
dists = results.get("distances", [[0.0] * len(docs)])[0]
|
|
64
|
+
logger.debug(f"Distance range: {min(dists)} to {max(dists)}")
|
|
65
|
+
|
|
66
|
+
normalized = []
|
|
67
|
+
for doc, meta, dist in zip(docs, metas, dists):
|
|
68
|
+
normalized.append({
|
|
69
|
+
"text": doc,
|
|
70
|
+
"metadata": meta,
|
|
71
|
+
"distance": dist
|
|
72
|
+
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
logger.info(f"Retrieved {len(normalized)} chunks from database.")
|
|
76
|
+
|
|
77
|
+
return normalized
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
RAG Orchestrator for the RAGpy pipeline.
|
|
4
|
+
|
|
5
|
+
This module coordinates the full Retrieval‑Augmented Generation workflow,
|
|
6
|
+
including file ingestion, chunking, batching, embedding, retrieval,
|
|
7
|
+
reranking, context construction, prompt building, and final answer
|
|
8
|
+
generation. It acts as the high‑level interface that ties together all
|
|
9
|
+
RAGpy components:
|
|
10
|
+
|
|
11
|
+
• VectorDatabase — persistent storage of embedded chunks
|
|
12
|
+
• FileLoader — file loading and text extraction
|
|
13
|
+
• TextChunker — chunking of raw text
|
|
14
|
+
• ChunkBatcher — batching for efficient embedding
|
|
15
|
+
• AzureOpenAIRelay — embedding and LLM completion
|
|
16
|
+
• DatabaseRetriever — vector search
|
|
17
|
+
• Reranker — LLM‑based and deterministic reranking
|
|
18
|
+
• ChunkCompressor — context compression for efficient prompting
|
|
19
|
+
|
|
20
|
+
All functions in this module operate at the orchestration level and are
|
|
21
|
+
intended to be used directly by applications integrating RAGpy.
|
|
22
|
+
"""
|
|
23
|
+
import os
|
|
24
|
+
import logging
|
|
25
|
+
|
|
26
|
+
from ragpy.VectorDatabase import OpenDatabase, CheckIfInDatabase, AddToDatabase
|
|
27
|
+
from ragpy.loaders.FileLoader import LoadFile
|
|
28
|
+
from ragpy.loaders.TextChunker import TextToChunk
|
|
29
|
+
from ragpy.batching.ChunkBatcher import BatchChunks
|
|
30
|
+
|
|
31
|
+
import ragpy.AzureOpenAIRelay as AI
|
|
32
|
+
import ragpy.Reranker as RR
|
|
33
|
+
import ragpy.DatabaseRetriever as DR
|
|
34
|
+
import ragpy.ChunkCompressor as CC
|
|
35
|
+
|
|
36
|
+
logger = logging.getLogger(__name__)
|
|
37
|
+
|
|
38
|
+
def IngestFile(path, databaseName, databaseLoc = "./vectorDB"):
|
|
39
|
+
"""
|
|
40
|
+
Ingest a file into the vector database.
|
|
41
|
+
|
|
42
|
+
Performs the full ingestion pipeline:
|
|
43
|
+
1. Load the file from disk.
|
|
44
|
+
2. Chunk the extracted text.
|
|
45
|
+
3. Batch chunks for efficient embedding.
|
|
46
|
+
4. Embed all batches using AzureOpenAIRelay.
|
|
47
|
+
5. Store embedded chunks in the persistent vector database.
|
|
48
|
+
|
|
49
|
+
If the file has already been ingested, the operation is skipped.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
path (str):
|
|
53
|
+
Path to the file to ingest.
|
|
54
|
+
databaseName (str):
|
|
55
|
+
Name of the vector database collection.
|
|
56
|
+
databaseLoc (str):
|
|
57
|
+
Directory where the database is stored.
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
None
|
|
61
|
+
"""
|
|
62
|
+
filename = os.path.basename(path)
|
|
63
|
+
logger.info(f"Found the file: {filename}")
|
|
64
|
+
|
|
65
|
+
#Open the Database
|
|
66
|
+
OpenDatabase(databaseName, databaseLoc)
|
|
67
|
+
|
|
68
|
+
# Check if already embedded
|
|
69
|
+
if CheckIfInDatabase(path):
|
|
70
|
+
logger.info(f"Skipping {filename}: already embedded.")
|
|
71
|
+
return
|
|
72
|
+
else:
|
|
73
|
+
logger.info("File did not exist in DB, Embedding.")
|
|
74
|
+
text = LoadFile(path)
|
|
75
|
+
chunks = TextToChunk(text)
|
|
76
|
+
batches = list(BatchChunks(chunks))
|
|
77
|
+
vectors = AI.EmbedChunksInBatches(batches)
|
|
78
|
+
|
|
79
|
+
AddToDatabase(chunks, vectors, filename)
|
|
80
|
+
|
|
81
|
+
logger.info(f"Index operation complete. {filename} was split into {len(chunks)} chunks, batched into {len(batches)} batches, and indexed.")
|
|
82
|
+
|
|
83
|
+
def BuildContext(retrieved_chunks):
|
|
84
|
+
"""
|
|
85
|
+
Construct a readable context block from retrieved chunks.
|
|
86
|
+
|
|
87
|
+
Each chunk is formatted with a citation marker and includes metadata
|
|
88
|
+
such as the source filename and chunk index. This context block is
|
|
89
|
+
used directly in RAG prompt construction.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
retrieved_chunks (list[dict]):
|
|
93
|
+
Retrieved chunk objects containing:
|
|
94
|
+
- "text": The chunk text.
|
|
95
|
+
- "metadata": Metadata including "source" and "chunk_id".
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
str:
|
|
99
|
+
A formatted context block with citation markers.
|
|
100
|
+
"""
|
|
101
|
+
context = []
|
|
102
|
+
|
|
103
|
+
for i, r in enumerate(retrieved_chunks):
|
|
104
|
+
source = r["metadata"].get("source", "unknown")
|
|
105
|
+
chunk_id = r["metadata"].get("chunk_id", "N/A")
|
|
106
|
+
text = r.get("text")
|
|
107
|
+
|
|
108
|
+
context.append(f"[CITATION {i}] Source: {source} (chunk {chunk_id})\n{text}\n")
|
|
109
|
+
|
|
110
|
+
return "\n".join(context)
|
|
111
|
+
|
|
112
|
+
def BuildPrompt(context, query, use_citations=True):
|
|
113
|
+
"""
|
|
114
|
+
Build a full RAG prompt including context, optional citation instructions,
|
|
115
|
+
and the user query.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
context (str):
|
|
119
|
+
The context block to include in the prompt.
|
|
120
|
+
query (str):
|
|
121
|
+
The user question.
|
|
122
|
+
use_citations (bool):
|
|
123
|
+
Whether to include citation instructions for the LLM.
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
str:
|
|
127
|
+
The constructed prompt ready for LLM completion.
|
|
128
|
+
"""
|
|
129
|
+
if use_citations:
|
|
130
|
+
citation_instructions = """
|
|
131
|
+
When you use information from a chunk, cite it like this: [CITATION X].
|
|
132
|
+
Do not invent citations. Only cite chunks that appear in the context.
|
|
133
|
+
"""
|
|
134
|
+
else:
|
|
135
|
+
citation_instructions = ""
|
|
136
|
+
|
|
137
|
+
prompt = f"""
|
|
138
|
+
{citation_instructions}
|
|
139
|
+
|
|
140
|
+
### Context:
|
|
141
|
+
{context}
|
|
142
|
+
|
|
143
|
+
### Focus specifically on information relevant to the user query:
|
|
144
|
+
{query}
|
|
145
|
+
|
|
146
|
+
### Your Answer (friendly, readable, grounded):
|
|
147
|
+
"""
|
|
148
|
+
return prompt
|
|
149
|
+
|
|
150
|
+
def GenerateAnswer(query, k=5):
|
|
151
|
+
"""
|
|
152
|
+
Generate a grounded answer using the full RAG pipeline.
|
|
153
|
+
|
|
154
|
+
Steps:
|
|
155
|
+
1. Embed the user query.
|
|
156
|
+
2. Retrieve top‑K candidate chunks from the vector database.
|
|
157
|
+
3. Rerank candidates using the LLM-based reranker.
|
|
158
|
+
4. Compress the top chunks into a compact context summary.
|
|
159
|
+
5. Build a RAG prompt using the compressed context.
|
|
160
|
+
6. Generate the final answer using AzureOpenAIRelay.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
query (str):
|
|
164
|
+
The user question.
|
|
165
|
+
k (int):
|
|
166
|
+
Number of chunks to retrieve before reranking.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
str:
|
|
170
|
+
The final grounded answer generated by the LLM.
|
|
171
|
+
|
|
172
|
+
Raises:
|
|
173
|
+
ValueError:
|
|
174
|
+
If the query embedding has an unexpected dimensionality.
|
|
175
|
+
"""
|
|
176
|
+
embedded_Query = AI.EmbedText(query)
|
|
177
|
+
|
|
178
|
+
if len(embedded_Query) != 3072:
|
|
179
|
+
raise ValueError("Query embedding dimension mismatch.")
|
|
180
|
+
|
|
181
|
+
# Step 1: Retrieve more chunks
|
|
182
|
+
retrieved = DR.RetrieveTopK(embedded_Query, k)
|
|
183
|
+
|
|
184
|
+
# Step 2: Rerank them
|
|
185
|
+
reranked = RR.RerankLLM(query, retrieved)
|
|
186
|
+
|
|
187
|
+
# Step 3: Keep top chunks for compression
|
|
188
|
+
top_for_compression = [
|
|
189
|
+
{"chunk": item["text"], "metadata": item["metadata"]}
|
|
190
|
+
for item in reranked[:10]
|
|
191
|
+
]
|
|
192
|
+
|
|
193
|
+
# Step 4: Compress them
|
|
194
|
+
compressed_context = CC.CompressChunks(query, top_for_compression)
|
|
195
|
+
|
|
196
|
+
# Step 5: Build prompt using compressed context
|
|
197
|
+
prompt = BuildPrompt(compressed_context, query, use_citations=False)
|
|
198
|
+
|
|
199
|
+
# Step 6: Generate answer
|
|
200
|
+
raw_answer = AI.ChatCompletion(prompt)
|
|
201
|
+
|
|
202
|
+
# Step 7: Resolve citations (optional with compression)
|
|
203
|
+
# You can skip citations here OR keep them if you want
|
|
204
|
+
# but compressed context loses chunk-level mapping.
|
|
205
|
+
# So we skip citations for compressed mode.
|
|
206
|
+
|
|
207
|
+
return raw_answer
|
ragpy-core/Reranker.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
LLM-based and deterministic rerankers for RAG pipelines.
|
|
4
|
+
|
|
5
|
+
This module provides two reranking strategies:
|
|
6
|
+
|
|
7
|
+
• RerankLLM — the primary reranker used during real RAGpy execution.
|
|
8
|
+
It evaluates chunk relevance using an LLM scoring prompt and returns
|
|
9
|
+
chunks sorted by descending relevance score while preserving metadata.
|
|
10
|
+
|
|
11
|
+
• Rerank — a deterministic, test-friendly fallback used exclusively
|
|
12
|
+
for unit testing. It sorts chunks by text length to ensure stable,
|
|
13
|
+
reproducible ordering without requiring an LLM.
|
|
14
|
+
|
|
15
|
+
Both functions accept retrieved chunk objects produced by the retrieval
|
|
16
|
+
layer and return normalized dictionaries suitable for downstream
|
|
17
|
+
compression and prompt construction.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from ragpy import AzureOpenAIRelay as AI
|
|
21
|
+
|
|
22
|
+
def RerankLLM(query, retrieved_chunks):
|
|
23
|
+
"""
|
|
24
|
+
Rerank retrieved chunks using an LLM-based scoring model.
|
|
25
|
+
|
|
26
|
+
Each chunk is evaluated for relevance to the user query by sending a
|
|
27
|
+
scoring prompt to the LLM via AzureOpenAIRelay. The model is expected
|
|
28
|
+
to return a floating‑point number between 0 and 1. Invalid or malformed
|
|
29
|
+
output safely defaults to 0.0. Scores are clamped to the range [0, 1].
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
query (str):
|
|
33
|
+
The user query used to evaluate relevance.
|
|
34
|
+
retrieved_chunks (list[dict]):
|
|
35
|
+
A list of retrieved chunk objects containing:
|
|
36
|
+
- "text": The chunk text.
|
|
37
|
+
- "metadata": Metadata including "source" and "chunk_id".
|
|
38
|
+
- "distance": The vector distance score.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
list[dict]:
|
|
42
|
+
A list of dictionaries with keys:
|
|
43
|
+
- "text": The chunk text.
|
|
44
|
+
- "metadata": Original metadata preserved.
|
|
45
|
+
- "score": The relevance score (float).
|
|
46
|
+
Sorted in descending score order.
|
|
47
|
+
|
|
48
|
+
Notes:
|
|
49
|
+
- This function is used during real RAG pipeline execution.
|
|
50
|
+
- It is intentionally monkeypatch‑friendly for offline testing.
|
|
51
|
+
- Metadata is preserved to support compression and citation workflows.
|
|
52
|
+
"""
|
|
53
|
+
scored = []
|
|
54
|
+
|
|
55
|
+
for r in retrieved_chunks:
|
|
56
|
+
chunk_text = r.get("chunk") or r.get("text")
|
|
57
|
+
|
|
58
|
+
prompt = f"""
|
|
59
|
+
You are a scoring model. Rate the relevance of the chunk to the query from 0 to 1.
|
|
60
|
+
Return ONLY a number.
|
|
61
|
+
|
|
62
|
+
Query:
|
|
63
|
+
{query}
|
|
64
|
+
|
|
65
|
+
Chunk:
|
|
66
|
+
{chunk_text}
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
score_text = AI.ChatCompletion(prompt).strip()
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
score = float(score_text)
|
|
73
|
+
score = max(0.0, min(score, 1.0))
|
|
74
|
+
except:
|
|
75
|
+
score = 0.0
|
|
76
|
+
|
|
77
|
+
scored.append({
|
|
78
|
+
"text": chunk_text,
|
|
79
|
+
"metadata": r.get("metadata", {}),
|
|
80
|
+
"score": score
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
scored.sort(key=lambda x: x["score"], reverse=True)
|
|
84
|
+
return scored
|
|
85
|
+
|
|
86
|
+
def Rerank(query, retrieved_chunks):
|
|
87
|
+
"""
|
|
88
|
+
Deterministic, test-friendly reranker used only for unit testing.
|
|
89
|
+
|
|
90
|
+
This fallback reranker provides predictable behavior without relying
|
|
91
|
+
on an LLM. It normalizes chunk objects and sorts them by text length,
|
|
92
|
+
placing shorter chunks first. All metadata is preserved.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
query (str):
|
|
96
|
+
The user query (unused in this heuristic but included for API consistency).
|
|
97
|
+
retrieved_chunks (list[dict]):
|
|
98
|
+
A list of retrieved chunk objects containing "text" and "metadata".
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
list[dict]:
|
|
102
|
+
A list of normalized chunk dictionaries sorted by ascending text length.
|
|
103
|
+
|
|
104
|
+
Notes:
|
|
105
|
+
- This function is used exclusively for testing.
|
|
106
|
+
- Tests expect shorter chunks to appear first.
|
|
107
|
+
- Metadata is preserved to maintain compatibility with downstream steps.
|
|
108
|
+
"""
|
|
109
|
+
normalized = []
|
|
110
|
+
for r in retrieved_chunks:
|
|
111
|
+
text = r.get("text") or r.get("chunk") or ""
|
|
112
|
+
normalized.append({
|
|
113
|
+
"text": text,
|
|
114
|
+
**{k: v for k, v in r.items() if k not in ["text", "chunk"]}
|
|
115
|
+
})
|
|
116
|
+
|
|
117
|
+
# Heuristic: shorter text is usually more focused → tests expect this ordering.
|
|
118
|
+
return sorted(normalized, key=lambda c: len(c["text"]))
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Global state:
|
|
3
|
+
# - `client` holds the active ChromaDB PersistentClient
|
|
4
|
+
# - `collection` holds the active ChromaDB collection
|
|
5
|
+
"""
|
|
6
|
+
ChromaDB persistence utilities for the RAGpy pipeline.
|
|
7
|
+
|
|
8
|
+
This module provides a thin wrapper around ChromaDB's PersistentClient,
|
|
9
|
+
offering simple functions for creating, opening, and populating a vector
|
|
10
|
+
database. The design is intentionally minimal and deterministic to support
|
|
11
|
+
production usage and easy monkeypatching during unit tests.
|
|
12
|
+
|
|
13
|
+
Global State:
|
|
14
|
+
client — the active ChromaDB PersistentClient
|
|
15
|
+
collection — the active ChromaDB collection
|
|
16
|
+
"""
|
|
17
|
+
import chromadb
|
|
18
|
+
import os
|
|
19
|
+
import logging
|
|
20
|
+
|
|
21
|
+
client = None
|
|
22
|
+
collection = None
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def CreateDatabase(dbName, path="./vectorDB"):
|
|
27
|
+
"""
|
|
28
|
+
Create a new persistent ChromaDB collection.
|
|
29
|
+
|
|
30
|
+
Initializes a PersistentClient at the given path and unconditionally
|
|
31
|
+
creates a new collection with the specified name. If the collection
|
|
32
|
+
already exists, ChromaDB will raise an error.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
dbName (str):
|
|
36
|
+
Name of the collection to create.
|
|
37
|
+
path (str):
|
|
38
|
+
Filesystem path for the persistent database.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
chromadb.api.models.Collection.Collection:
|
|
42
|
+
The newly created collection.
|
|
43
|
+
|
|
44
|
+
Side Effects:
|
|
45
|
+
- Sets the global `client` and `collection` variables.
|
|
46
|
+
"""
|
|
47
|
+
global client, collection
|
|
48
|
+
|
|
49
|
+
client = chromadb.PersistentClient(path)
|
|
50
|
+
|
|
51
|
+
collection = client.create_collection(dbName)
|
|
52
|
+
|
|
53
|
+
return collection
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def OpenDatabase(dbName, path="./vectorDB"):
|
|
57
|
+
"""
|
|
58
|
+
Open an existing ChromaDB collection, creating it if necessary.
|
|
59
|
+
|
|
60
|
+
Initializes a PersistentClient at the given path and attempts to load
|
|
61
|
+
an existing collection. If the collection does not exist, it is created
|
|
62
|
+
automatically.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
dbName (str):
|
|
66
|
+
Name of the collection to open.
|
|
67
|
+
path (str):
|
|
68
|
+
Filesystem path for the persistent database.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
chromadb.api.models.Collection.Collection:
|
|
72
|
+
The opened or newly created collection.
|
|
73
|
+
|
|
74
|
+
Side Effects:
|
|
75
|
+
- Sets the global `client` and `collection` variables.
|
|
76
|
+
"""
|
|
77
|
+
global client, collection
|
|
78
|
+
|
|
79
|
+
client = chromadb.PersistentClient(path)
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
collection = client.get_collection(dbName)
|
|
83
|
+
except chromadb.errors.NotFoundError:
|
|
84
|
+
logger.info("Database does not exist. Creating...")
|
|
85
|
+
collection = CreateDatabase(dbName, path)
|
|
86
|
+
|
|
87
|
+
return collection
|
|
88
|
+
|
|
89
|
+
def AddToDatabase(chunks, vectors, source_name):
|
|
90
|
+
"""
|
|
91
|
+
Add embedded text chunks to the active ChromaDB collection.
|
|
92
|
+
|
|
93
|
+
Stores text chunks, their embedding vectors, and associated metadata
|
|
94
|
+
in the currently opened collection. Each chunk receives a globally
|
|
95
|
+
unique ID based on the source filename and its index.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
chunks (list[str]):
|
|
99
|
+
Text chunks extracted from a document.
|
|
100
|
+
vectors (list[list[float]]):
|
|
101
|
+
Embedding vectors corresponding to each chunk.
|
|
102
|
+
source_name (str):
|
|
103
|
+
The name of the source document (e.g., filename).
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
None
|
|
107
|
+
|
|
108
|
+
Raises:
|
|
109
|
+
RuntimeError:
|
|
110
|
+
If no database collection is currently open.
|
|
111
|
+
ValueError:
|
|
112
|
+
If chunk/vector counts differ or an embedding has an unexpected
|
|
113
|
+
dimensionality.
|
|
114
|
+
|
|
115
|
+
Notes:
|
|
116
|
+
- Embedding vectors are validated to ensure a 3072‑dimensional shape.
|
|
117
|
+
- Metadata stores only lightweight identifiers (`source`, `chunk_id`)
|
|
118
|
+
to avoid duplicating chunk text.
|
|
119
|
+
"""
|
|
120
|
+
global collection
|
|
121
|
+
if collection is None:
|
|
122
|
+
raise RuntimeError("No database open. Call CreateDatabase() or OpenDatabase() first.")
|
|
123
|
+
|
|
124
|
+
if len(chunks) != len(vectors):
|
|
125
|
+
raise ValueError("Chunks and vectors must have the same length.")
|
|
126
|
+
|
|
127
|
+
ids = []
|
|
128
|
+
embeddings = []
|
|
129
|
+
documents = []
|
|
130
|
+
metadatas = []
|
|
131
|
+
|
|
132
|
+
for i, (chunk, vector) in enumerate(zip(chunks, vectors)):
|
|
133
|
+
|
|
134
|
+
if len(vector) != 3072:
|
|
135
|
+
raise ValueError(f"Invalid embedding length: {len(vector)}")
|
|
136
|
+
|
|
137
|
+
ids.append(f"{source_name}_{i}")
|
|
138
|
+
embeddings.append(vector)
|
|
139
|
+
documents.append(chunk)
|
|
140
|
+
metadatas.append({"source": source_name, "chunk_id": i})
|
|
141
|
+
|
|
142
|
+
collection.add(
|
|
143
|
+
ids=ids,
|
|
144
|
+
embeddings=embeddings,
|
|
145
|
+
|
|
146
|
+
documents=documents,
|
|
147
|
+
metadatas=metadatas
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
def CheckIfInDatabase(filepath):
|
|
151
|
+
"""
|
|
152
|
+
Determine whether a file has already been ingested into the database.
|
|
153
|
+
|
|
154
|
+
Uses metadata filtering to check whether any stored chunk originates
|
|
155
|
+
from the given file.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
filepath (str):
|
|
159
|
+
Path to the file being checked.
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
bool:
|
|
163
|
+
True if any metadata entry references the file, False otherwise.
|
|
164
|
+
|
|
165
|
+
Raises:
|
|
166
|
+
RuntimeError:
|
|
167
|
+
If no database collection is currently open.
|
|
168
|
+
|
|
169
|
+
Notes:
|
|
170
|
+
- Matching is performed using the filename only (basename).
|
|
171
|
+
- Uses ChromaDB's `where` filtering for efficient lookup.
|
|
172
|
+
"""
|
|
173
|
+
if collection is None:
|
|
174
|
+
raise RuntimeError("No database open. Call CreateDatabase() or OpenDatabase() first.")
|
|
175
|
+
|
|
176
|
+
filename = os.path.basename(filepath)
|
|
177
|
+
|
|
178
|
+
results = collection.get(where={"source": filename})
|
|
179
|
+
|
|
180
|
+
for m in results["metadatas"]:
|
|
181
|
+
if m["source"] == filename:
|
|
182
|
+
return True
|
|
183
|
+
|
|
184
|
+
return False
|
ragpy-core/__init__.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
Created on Wed Jun 24 17:10:10 2026
|
|
4
|
+
|
|
5
|
+
@author: klusm
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .AzureOpenAIRelay import (
|
|
9
|
+
SetEmbeddingEndpointInfo,
|
|
10
|
+
SetCompletionEndpointInfo
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
from .RAGOrchestrator import (
|
|
14
|
+
IngestFile,
|
|
15
|
+
GenerateAnswer,
|
|
16
|
+
BuildContext,
|
|
17
|
+
BuildPrompt
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
from .VectorDatabase import (
|
|
21
|
+
OpenDatabase,
|
|
22
|
+
AddToDatabase,
|
|
23
|
+
CheckIfInDatabase
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
from .utils.CitationHelper import (
|
|
27
|
+
ResolveCitations,
|
|
28
|
+
BuildReferences
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
__all__ = [
|
|
32
|
+
"SetEmbeddingEndpointInfo",
|
|
33
|
+
"SetCompletionEndpointInfo",
|
|
34
|
+
"IngestFile",
|
|
35
|
+
"GenerateAnswer",
|
|
36
|
+
"BuildContext",
|
|
37
|
+
"BuildPrompt",
|
|
38
|
+
"OpenDatabase",
|
|
39
|
+
"AddToDatabase",
|
|
40
|
+
"CheckIfInDatabase",
|
|
41
|
+
"ResolveCitations",
|
|
42
|
+
"BuildReferences",
|
|
43
|
+
]
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ragpy-core
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: A modular Retrieval-Augmented Generation (RAG) pipeline for Python.
|
|
5
|
+
Author: William Klusman
|
|
6
|
+
Author-email: William Klusman <klusmannwilliam@gmail.com>
|
|
7
|
+
License: MIT License
|
|
8
|
+
|
|
9
|
+
Copyright (c) 2026 William Klusman
|
|
10
|
+
|
|
11
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
12
|
+
of this software and associated documentation files (the “Software”), to deal
|
|
13
|
+
in the Software without restriction, including without limitation the rights
|
|
14
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
15
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
16
|
+
furnished to do so, subject to the following conditions:
|
|
17
|
+
|
|
18
|
+
The above copyright notice and this permission notice shall be included in
|
|
19
|
+
all copies or substantial portions of the Software.
|
|
20
|
+
|
|
21
|
+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
22
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
23
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
24
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
25
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
26
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
27
|
+
DEALINGS IN THE SOFTWARE.
|
|
28
|
+
Keywords: RAG,retrieval-augmented-generation,LLM,vector-database,azure-openai,machine-learning,nlp
|
|
29
|
+
Classifier: Development Status :: 3 - Alpha
|
|
30
|
+
Classifier: Intended Audience :: Developers
|
|
31
|
+
Classifier: Intended Audience :: Science/Research
|
|
32
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
33
|
+
Classifier: Programming Language :: Python :: 3
|
|
34
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
35
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
37
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
38
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
39
|
+
Requires-Python: >=3.9
|
|
40
|
+
Description-Content-Type: text/markdown
|
|
41
|
+
License-File: LICENSE
|
|
42
|
+
Requires-Dist: chromadb>=0.4.0
|
|
43
|
+
Requires-Dist: numpy>=1.20
|
|
44
|
+
Requires-Dist: tiktoken>=0.5.0
|
|
45
|
+
Requires-Dist: requests>=2.0
|
|
46
|
+
Requires-Dist: openai>=1.0.0
|
|
47
|
+
Requires-Dist: pypdf>=4.0.0
|
|
48
|
+
Dynamic: author
|
|
49
|
+
Dynamic: license-file
|
|
50
|
+
Dynamic: requires-python
|
|
51
|
+
|
|
52
|
+
RAGpy
|
|
53
|
+
RAGpy is a lightweight, modular Retrieval-Augmented Generation (RAG) pipeline for Python. It provides a clear and testable architecture for document ingestion, chunking, embedding, retrieval, reranking, context compression, and grounded answer generation using Azure OpenAI and ChromaDB.
|
|
54
|
+
|
|
55
|
+
RAGpy is designed for developers who want a transparent, hackable RAG system without the complexity of large frameworks.
|
|
56
|
+
|
|
57
|
+
Features
|
|
58
|
+
Modular ingestion pipeline for text and PDF documents
|
|
59
|
+
|
|
60
|
+
Chunking and batching utilities for efficient embedding
|
|
61
|
+
|
|
62
|
+
Azure OpenAI embeddings and chat completions
|
|
63
|
+
|
|
64
|
+
ChromaDB vector database integration
|
|
65
|
+
|
|
66
|
+
LLM-based reranking for improved retrieval quality
|
|
67
|
+
|
|
68
|
+
Context compression to reduce token usage
|
|
69
|
+
|
|
70
|
+
Fully monkeypatch-friendly design for offline testing
|
|
71
|
+
|
|
72
|
+
Clean architecture suitable for extension and customization
|
|
73
|
+
|
|
74
|
+
Installation
|
|
75
|
+
Once published to PyPI:
|
|
76
|
+
|
|
77
|
+
Code
|
|
78
|
+
pip install ragpy
|
|
79
|
+
For development:
|
|
80
|
+
|
|
81
|
+
Code
|
|
82
|
+
git clone https://github.com/yourusername/ragpy
|
|
83
|
+
cd ragpy
|
|
84
|
+
pip install -e .
|
|
85
|
+
Quickstart Example
|
|
86
|
+
python
|
|
87
|
+
from ragpy.RAGOrchestrator import IngestFile, GenerateAnswer
|
|
88
|
+
from ragpy.VectorDatabase import OpenDatabase
|
|
89
|
+
|
|
90
|
+
OpenDatabase("AeroDB", "./vectorDB")
|
|
91
|
+
IngestFile("engine_vibration.pdf", "AeroDB")
|
|
92
|
+
|
|
93
|
+
answer = GenerateAnswer("What causes engine vibration?", "AeroDB")
|
|
94
|
+
print(answer)
|
|
95
|
+
How RAGpy Works
|
|
96
|
+
1. Ingestion
|
|
97
|
+
Load text or PDF using FileLoader
|
|
98
|
+
|
|
99
|
+
Chunk text using TextChunker
|
|
100
|
+
|
|
101
|
+
Batch chunks using ChunkBatcher
|
|
102
|
+
|
|
103
|
+
Generate embeddings with Azure OpenAI
|
|
104
|
+
|
|
105
|
+
Store vectors and metadata in ChromaDB
|
|
106
|
+
|
|
107
|
+
2. Retrieval
|
|
108
|
+
Embed the user query
|
|
109
|
+
|
|
110
|
+
Retrieve top-K candidates from the vector database
|
|
111
|
+
|
|
112
|
+
3. Reranking
|
|
113
|
+
Use an LLM-based reranker to reorder retrieved chunks by relevance
|
|
114
|
+
|
|
115
|
+
4. Compression
|
|
116
|
+
Summarize top chunks into a compact context block
|
|
117
|
+
|
|
118
|
+
5. Answer Generation
|
|
119
|
+
Build a prompt using compressed context
|
|
120
|
+
|
|
121
|
+
Generate a grounded answer using Azure OpenAI
|
|
122
|
+
|
|
123
|
+
Project Structure
|
|
124
|
+
Code
|
|
125
|
+
ragpy/
|
|
126
|
+
AzureOpenAIRelay.py
|
|
127
|
+
RAGOrchestrator.py
|
|
128
|
+
VectorDatabase.py
|
|
129
|
+
Reranker.py
|
|
130
|
+
ChunkCompressor.py
|
|
131
|
+
loaders/
|
|
132
|
+
FileLoader.py
|
|
133
|
+
TextChunker.py
|
|
134
|
+
batching/
|
|
135
|
+
ChunkBatcher.py
|
|
136
|
+
tests/
|
|
137
|
+
docs/
|
|
138
|
+
Requirements
|
|
139
|
+
Python 3.9+
|
|
140
|
+
|
|
141
|
+
ChromaDB
|
|
142
|
+
|
|
143
|
+
numpy
|
|
144
|
+
|
|
145
|
+
tiktoken
|
|
146
|
+
|
|
147
|
+
pypdf
|
|
148
|
+
|
|
149
|
+
openai (Azure OpenAI SDK)
|
|
150
|
+
|
|
151
|
+
Testing
|
|
152
|
+
RAGpy includes a full pytest suite. All Azure calls are monkeypatch-friendly, allowing offline testing with mock LLMs.
|
|
153
|
+
|
|
154
|
+
Run tests:
|
|
155
|
+
|
|
156
|
+
Code
|
|
157
|
+
pytest -q
|
|
158
|
+
Contributing
|
|
159
|
+
Contributions are welcome.
|
|
160
|
+
Please open an issue or submit a pull request on GitHub.
|
|
161
|
+
|
|
162
|
+
Planned enhancements include:
|
|
163
|
+
|
|
164
|
+
Local embedding support (sentence-transformers)
|
|
165
|
+
|
|
166
|
+
Hybrid retrieval (vector + keyword)
|
|
167
|
+
|
|
168
|
+
Multimodal RAG (image + text)
|
|
169
|
+
|
|
170
|
+
Evaluation tools for relevance and faithfulness
|
|
171
|
+
|
|
172
|
+
Agentic RAG extensions
|
|
173
|
+
|
|
174
|
+
License
|
|
175
|
+
RAGpy is released under the MIT License.
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
ragpy-core/AzureOpenAIRelay.py,sha256=IqD_zkMnDlmMdyKfTEtJsYS3s7yXR71WrBewkYkHfVI,6147
|
|
2
|
+
ragpy-core/ChunkCompressor.py,sha256=iVAgW7Ku5FQx8ntjEregP8O2J0Qc9OFNxOv-iO5lX-0,2277
|
|
3
|
+
ragpy-core/DatabaseRetriever.py,sha256=jvbN9esU5RzQW98uA6qYE7SQvaGx87XGQdslZ9yIxu8,2793
|
|
4
|
+
ragpy-core/RAGOrchestrator.py,sha256=lR1GldVnF0xVGCfUijETYh4kdWHS8Zd_-FitxuIbV7k,6857
|
|
5
|
+
ragpy-core/Reranker.py,sha256=_AMvwtQp1xulBq_7iInXRuiian39tfkGxzNOLyCdCIg,4167
|
|
6
|
+
ragpy-core/VectorDatabase.py,sha256=xado-9gobVEs14tcJfFe7NmPVCT6Y3P168RVcit8w-0,5576
|
|
7
|
+
ragpy-core/__init__.py,sha256=Vih4zqBsj9rJKXSrdhdCAU9ryEBr-ZHCmbdjvrX_42k,769
|
|
8
|
+
ragpy_core-1.0.0.dist-info/licenses/LICENSE,sha256=N0pk1ZLa00VYhRM9xzwThPMdfL_8acMtFLhMZj_fSeA,1119
|
|
9
|
+
ragpy_core-1.0.0.dist-info/METADATA,sha256=HDopMelIJgs-WSSb4KHNjpSlExBvnBvrBnl4g7FSzyc,5313
|
|
10
|
+
ragpy_core-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
11
|
+
ragpy_core-1.0.0.dist-info/top_level.txt,sha256=2nLuavOLaI-fiobSkBx_CDqjD1kWlO2_2TCgOKT_rhY,11
|
|
12
|
+
ragpy_core-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 William Klusman
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the “Software”), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
DEALINGS IN THE SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
ragpy-core
|