ragxo 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragxo-0.1.0/PKG-INFO +30 -0
- ragxo-0.1.0/README.md +0 -0
- ragxo-0.1.0/pyproject.toml +53 -0
- ragxo-0.1.0/ragxo/__init__.py +0 -0
- ragxo-0.1.0/ragxo/ragx.py +141 -0
ragxo-0.1.0/PKG-INFO
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: ragxo
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: A RAG (Retrieval-Augmented Generation) toolkit with Milvus integration
|
5
|
+
Home-page: https://github.com/yourusername/ragx
|
6
|
+
License: MIT
|
7
|
+
Keywords: rag,milvus,nlp,embeddings,openai
|
8
|
+
Author: Mohamed Sadek
|
9
|
+
Author-email: mohamedfawzydes@gmail.com
|
10
|
+
Requires-Python: >=3.11,<4.0
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
12
|
+
Classifier: Intended Audience :: Developers
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
20
|
+
Requires-Dist: dill (>=0.3.9,<0.4.0)
|
21
|
+
Requires-Dist: milvus (>=2.3.9,<3.0.0)
|
22
|
+
Requires-Dist: nltk (>=3.9.1,<4.0.0)
|
23
|
+
Requires-Dist: openai (>=1.61.1,<2.0.0)
|
24
|
+
Requires-Dist: pydantic (>=2.10.6,<3.0.0)
|
25
|
+
Requires-Dist: pymilvus (>=2.5.4,<3.0.0)
|
26
|
+
Requires-Dist: sentence-transformers (>=3.4.1,<4.0.0)
|
27
|
+
Project-URL: Repository, https://github.com/yourusername/ragx
|
28
|
+
Description-Content-Type: text/markdown
|
29
|
+
|
30
|
+
|
ragxo-0.1.0/README.md
ADDED
File without changes
|
@@ -0,0 +1,53 @@
|
|
1
|
+
[tool.poetry]
|
2
|
+
name = "ragxo"
|
3
|
+
version = "0.1.0"
|
4
|
+
description = "A RAG (Retrieval-Augmented Generation) toolkit with Milvus integration"
|
5
|
+
authors = ["Mohamed Sadek <mohamedfawzydes@gmail.com>"]
|
6
|
+
readme = "README.md"
|
7
|
+
license = "MIT"
|
8
|
+
repository = "https://github.com/yourusername/ragx"
|
9
|
+
keywords = ["rag", "milvus", "nlp", "embeddings", "openai"]
|
10
|
+
classifiers = [
|
11
|
+
"Development Status :: 3 - Alpha",
|
12
|
+
"Intended Audience :: Developers",
|
13
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
14
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
15
|
+
"License :: OSI Approved :: MIT License",
|
16
|
+
"Programming Language :: Python :: 3",
|
17
|
+
"Programming Language :: Python :: 3.11",
|
18
|
+
]
|
19
|
+
packages = [{include = "ragxo"}]
|
20
|
+
|
21
|
+
[tool.poetry.dependencies]
|
22
|
+
python = "^3.11"
|
23
|
+
milvus = "^2.3.9"
|
24
|
+
pymilvus = "^2.5.4"
|
25
|
+
pydantic = "^2.10.6"
|
26
|
+
nltk = "^3.9.1"
|
27
|
+
sentence-transformers = "^3.4.1"
|
28
|
+
dill = "^0.3.9"
|
29
|
+
openai = "^1.61.1"
|
30
|
+
|
31
|
+
[tool.poetry.group.dev.dependencies]
|
32
|
+
pytest = "^8.3.4"
|
33
|
+
black = "^24.2.0"
|
34
|
+
isort = "^5.13.2"
|
35
|
+
flake8 = "^7.0.0"
|
36
|
+
mypy = "^1.9.0"
|
37
|
+
|
38
|
+
[build-system]
|
39
|
+
requires = ["poetry-core"]
|
40
|
+
build-backend = "poetry.core.masonry.api"
|
41
|
+
|
42
|
+
[tool.black]
|
43
|
+
line-length = 88
|
44
|
+
target-version = ['py311']
|
45
|
+
|
46
|
+
[tool.isort]
|
47
|
+
profile = "black"
|
48
|
+
multi_line_output = 3
|
49
|
+
|
50
|
+
[tool.mypy]
|
51
|
+
python_version = "3.11"
|
52
|
+
strict = true
|
53
|
+
ignore_missing_imports = true
|
File without changes
|
@@ -0,0 +1,141 @@
|
|
1
|
+
from typing import Self, Callable
|
2
|
+
from pymilvus import MilvusClient
|
3
|
+
from pydantic import BaseModel
|
4
|
+
import dill
|
5
|
+
import os
|
6
|
+
import shutil
|
7
|
+
import logging
|
8
|
+
import openai
|
9
|
+
from openai import ChatCompletion
|
10
|
+
|
11
|
+
logging.basicConfig(level=logging.DEBUG)
|
12
|
+
logger = logging.getLogger(__name__)
|
13
|
+
|
14
|
+
class Document(BaseModel):
|
15
|
+
text: str
|
16
|
+
metadata: dict
|
17
|
+
id: int
|
18
|
+
|
19
|
+
class Ragx:
|
20
|
+
def __init__(self, dimension: int) -> None:
|
21
|
+
self.dimension = dimension
|
22
|
+
self.collection_name = "ragx"
|
23
|
+
self.db_path = "milvus.db"
|
24
|
+
self.client = MilvusClient(self.db_path)
|
25
|
+
self.client.create_collection(self.collection_name, dimension=dimension)
|
26
|
+
self.processing_fn = []
|
27
|
+
self.embedding_fn = None
|
28
|
+
self.system_prompt = None
|
29
|
+
self.model = "gpt-4o-mini"
|
30
|
+
|
31
|
+
def add_preprocess(self, fn: Callable) -> Self:
|
32
|
+
self.processing_fn.append(fn)
|
33
|
+
return self
|
34
|
+
|
35
|
+
def add_embedding_fn(self, fn: Callable) -> Self:
|
36
|
+
if not fn:
|
37
|
+
raise ValueError("Embedding function cannot be None")
|
38
|
+
self.embedding_fn = fn
|
39
|
+
return self
|
40
|
+
|
41
|
+
def add_system_prompt(self, prompt: str) -> Self:
|
42
|
+
self.system_prompt = prompt
|
43
|
+
return self
|
44
|
+
|
45
|
+
def add_model(self, model: str) -> Self:
|
46
|
+
self.model = model
|
47
|
+
return self
|
48
|
+
|
49
|
+
def index(self, data: list[Document]) -> Self:
|
50
|
+
if not self.embedding_fn:
|
51
|
+
raise ValueError("Embedding function not set")
|
52
|
+
|
53
|
+
processed_text = []
|
54
|
+
for item in data:
|
55
|
+
current_text = item.text
|
56
|
+
for fn in self.processing_fn:
|
57
|
+
current_text = fn(current_text)
|
58
|
+
processed_text.append(current_text)
|
59
|
+
|
60
|
+
embeddings = [
|
61
|
+
self.embedding_fn(text)
|
62
|
+
for text in processed_text
|
63
|
+
]
|
64
|
+
|
65
|
+
self.client.insert(self.collection_name, [
|
66
|
+
{
|
67
|
+
"text": item.text,
|
68
|
+
"metadata": item.metadata,
|
69
|
+
"id": item.id,
|
70
|
+
"vector": embedding
|
71
|
+
}
|
72
|
+
for item, embedding in zip(data, embeddings)
|
73
|
+
])
|
74
|
+
return self
|
75
|
+
|
76
|
+
def query(self, query: str, output_fields: list[str] = ['text', 'metadata']) -> list[list[dict]]:
|
77
|
+
if not self.embedding_fn:
|
78
|
+
raise ValueError("Embedding function not set. Please call add_embedding_fn first.")
|
79
|
+
|
80
|
+
preprocessed_query = query
|
81
|
+
for fn in self.processing_fn:
|
82
|
+
preprocessed_query = fn(preprocessed_query)
|
83
|
+
|
84
|
+
embedding = self.embedding_fn(preprocessed_query)
|
85
|
+
|
86
|
+
return self.client.search(
|
87
|
+
collection_name=self.collection_name,
|
88
|
+
data=[embedding],
|
89
|
+
limit=10,
|
90
|
+
output_fields=output_fields
|
91
|
+
)
|
92
|
+
|
93
|
+
def export(self, folder_path: str) -> Self:
|
94
|
+
try:
|
95
|
+
os.makedirs(folder_path, exist_ok=True)
|
96
|
+
|
97
|
+
# Save using dill
|
98
|
+
pickle_path = os.path.join(folder_path, "ragx.pkl")
|
99
|
+
with open(pickle_path, "wb") as f:
|
100
|
+
dill.dump(self, f)
|
101
|
+
|
102
|
+
# Copy database
|
103
|
+
db_dest = os.path.join(folder_path, "milvus.db")
|
104
|
+
shutil.copy(self.db_path, db_dest)
|
105
|
+
|
106
|
+
return self
|
107
|
+
|
108
|
+
except Exception as e:
|
109
|
+
logger.error(f"Error in export: {e}")
|
110
|
+
raise
|
111
|
+
|
112
|
+
@classmethod
|
113
|
+
def load(cls, folder_path: str) -> 'Ragx':
|
114
|
+
try:
|
115
|
+
pickle_path = os.path.join(folder_path, "ragx.pkl")
|
116
|
+
|
117
|
+
with open(pickle_path, "rb") as f:
|
118
|
+
instance = dill.load(f)
|
119
|
+
|
120
|
+
# Restore client
|
121
|
+
instance.client = MilvusClient(os.path.join(folder_path, "milvus.db"))
|
122
|
+
|
123
|
+
return instance
|
124
|
+
|
125
|
+
except Exception as e:
|
126
|
+
logger.error(f"Error in load: {e}")
|
127
|
+
raise
|
128
|
+
|
129
|
+
def generate_llm_response(self, query: str) -> ChatCompletion:
|
130
|
+
if not self.system_prompt:
|
131
|
+
raise ValueError("System prompt not set. Please call add_system_prompt first.")
|
132
|
+
|
133
|
+
response = openai.chat.completions.create(
|
134
|
+
model=self.model,
|
135
|
+
messages=[
|
136
|
+
{"role": "system", "content": self.system_prompt},
|
137
|
+
{"role": "user", "content": query}
|
138
|
+
]
|
139
|
+
)
|
140
|
+
|
141
|
+
return response
|