ragxo 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ragxo-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,30 @@
1
+ Metadata-Version: 2.1
2
+ Name: ragxo
3
+ Version: 0.1.0
4
+ Summary: A RAG (Retrieval-Augmented Generation) toolkit with Milvus integration
5
+ Home-page: https://github.com/yourusername/ragx
6
+ License: MIT
7
+ Keywords: rag,milvus,nlp,embeddings,openai
8
+ Author: Mohamed Sadek
9
+ Author-email: mohamedfawzydes@gmail.com
10
+ Requires-Python: >=3.11,<4.0
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
+ Requires-Dist: dill (>=0.3.9,<0.4.0)
21
+ Requires-Dist: milvus (>=2.3.9,<3.0.0)
22
+ Requires-Dist: nltk (>=3.9.1,<4.0.0)
23
+ Requires-Dist: openai (>=1.61.1,<2.0.0)
24
+ Requires-Dist: pydantic (>=2.10.6,<3.0.0)
25
+ Requires-Dist: pymilvus (>=2.5.4,<3.0.0)
26
+ Requires-Dist: sentence-transformers (>=3.4.1,<4.0.0)
27
+ Project-URL: Repository, https://github.com/yourusername/ragx
28
+ Description-Content-Type: text/markdown
29
+
30
+
ragxo-0.1.0/README.md ADDED
File without changes
@@ -0,0 +1,53 @@
1
+ [tool.poetry]
2
+ name = "ragxo"
3
+ version = "0.1.0"
4
+ description = "A RAG (Retrieval-Augmented Generation) toolkit with Milvus integration"
5
+ authors = ["Mohamed Sadek <mohamedfawzydes@gmail.com>"]
6
+ readme = "README.md"
7
+ license = "MIT"
8
+ repository = "https://github.com/yourusername/ragx"
9
+ keywords = ["rag", "milvus", "nlp", "embeddings", "openai"]
10
+ classifiers = [
11
+ "Development Status :: 3 - Alpha",
12
+ "Intended Audience :: Developers",
13
+ "Topic :: Software Development :: Libraries :: Python Modules",
14
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
15
+ "License :: OSI Approved :: MIT License",
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3.11",
18
+ ]
19
+ packages = [{include = "ragxo"}]
20
+
21
+ [tool.poetry.dependencies]
22
+ python = "^3.11"
23
+ milvus = "^2.3.9"
24
+ pymilvus = "^2.5.4"
25
+ pydantic = "^2.10.6"
26
+ nltk = "^3.9.1"
27
+ sentence-transformers = "^3.4.1"
28
+ dill = "^0.3.9"
29
+ openai = "^1.61.1"
30
+
31
+ [tool.poetry.group.dev.dependencies]
32
+ pytest = "^8.3.4"
33
+ black = "^24.2.0"
34
+ isort = "^5.13.2"
35
+ flake8 = "^7.0.0"
36
+ mypy = "^1.9.0"
37
+
38
+ [build-system]
39
+ requires = ["poetry-core"]
40
+ build-backend = "poetry.core.masonry.api"
41
+
42
+ [tool.black]
43
+ line-length = 88
44
+ target-version = ['py311']
45
+
46
+ [tool.isort]
47
+ profile = "black"
48
+ multi_line_output = 3
49
+
50
+ [tool.mypy]
51
+ python_version = "3.11"
52
+ strict = true
53
+ ignore_missing_imports = true
File without changes
@@ -0,0 +1,141 @@
1
+ from typing import Self, Callable
2
+ from pymilvus import MilvusClient
3
+ from pydantic import BaseModel
4
+ import dill
5
+ import os
6
+ import shutil
7
+ import logging
8
+ import openai
9
+ from openai import ChatCompletion
10
+
11
+ logging.basicConfig(level=logging.DEBUG)
12
+ logger = logging.getLogger(__name__)
13
+
14
+ class Document(BaseModel):
15
+ text: str
16
+ metadata: dict
17
+ id: int
18
+
19
+ class Ragx:
20
+ def __init__(self, dimension: int) -> None:
21
+ self.dimension = dimension
22
+ self.collection_name = "ragx"
23
+ self.db_path = "milvus.db"
24
+ self.client = MilvusClient(self.db_path)
25
+ self.client.create_collection(self.collection_name, dimension=dimension)
26
+ self.processing_fn = []
27
+ self.embedding_fn = None
28
+ self.system_prompt = None
29
+ self.model = "gpt-4o-mini"
30
+
31
+ def add_preprocess(self, fn: Callable) -> Self:
32
+ self.processing_fn.append(fn)
33
+ return self
34
+
35
+ def add_embedding_fn(self, fn: Callable) -> Self:
36
+ if not fn:
37
+ raise ValueError("Embedding function cannot be None")
38
+ self.embedding_fn = fn
39
+ return self
40
+
41
+ def add_system_prompt(self, prompt: str) -> Self:
42
+ self.system_prompt = prompt
43
+ return self
44
+
45
+ def add_model(self, model: str) -> Self:
46
+ self.model = model
47
+ return self
48
+
49
+ def index(self, data: list[Document]) -> Self:
50
+ if not self.embedding_fn:
51
+ raise ValueError("Embedding function not set")
52
+
53
+ processed_text = []
54
+ for item in data:
55
+ current_text = item.text
56
+ for fn in self.processing_fn:
57
+ current_text = fn(current_text)
58
+ processed_text.append(current_text)
59
+
60
+ embeddings = [
61
+ self.embedding_fn(text)
62
+ for text in processed_text
63
+ ]
64
+
65
+ self.client.insert(self.collection_name, [
66
+ {
67
+ "text": item.text,
68
+ "metadata": item.metadata,
69
+ "id": item.id,
70
+ "vector": embedding
71
+ }
72
+ for item, embedding in zip(data, embeddings)
73
+ ])
74
+ return self
75
+
76
+ def query(self, query: str, output_fields: list[str] = ['text', 'metadata']) -> list[list[dict]]:
77
+ if not self.embedding_fn:
78
+ raise ValueError("Embedding function not set. Please call add_embedding_fn first.")
79
+
80
+ preprocessed_query = query
81
+ for fn in self.processing_fn:
82
+ preprocessed_query = fn(preprocessed_query)
83
+
84
+ embedding = self.embedding_fn(preprocessed_query)
85
+
86
+ return self.client.search(
87
+ collection_name=self.collection_name,
88
+ data=[embedding],
89
+ limit=10,
90
+ output_fields=output_fields
91
+ )
92
+
93
+ def export(self, folder_path: str) -> Self:
94
+ try:
95
+ os.makedirs(folder_path, exist_ok=True)
96
+
97
+ # Save using dill
98
+ pickle_path = os.path.join(folder_path, "ragx.pkl")
99
+ with open(pickle_path, "wb") as f:
100
+ dill.dump(self, f)
101
+
102
+ # Copy database
103
+ db_dest = os.path.join(folder_path, "milvus.db")
104
+ shutil.copy(self.db_path, db_dest)
105
+
106
+ return self
107
+
108
+ except Exception as e:
109
+ logger.error(f"Error in export: {e}")
110
+ raise
111
+
112
+ @classmethod
113
+ def load(cls, folder_path: str) -> 'Ragx':
114
+ try:
115
+ pickle_path = os.path.join(folder_path, "ragx.pkl")
116
+
117
+ with open(pickle_path, "rb") as f:
118
+ instance = dill.load(f)
119
+
120
+ # Restore client
121
+ instance.client = MilvusClient(os.path.join(folder_path, "milvus.db"))
122
+
123
+ return instance
124
+
125
+ except Exception as e:
126
+ logger.error(f"Error in load: {e}")
127
+ raise
128
+
129
+ def generate_llm_response(self, query: str) -> ChatCompletion:
130
+ if not self.system_prompt:
131
+ raise ValueError("System prompt not set. Please call add_system_prompt first.")
132
+
133
+ response = openai.chat.completions.create(
134
+ model=self.model,
135
+ messages=[
136
+ {"role": "system", "content": self.system_prompt},
137
+ {"role": "user", "content": query}
138
+ ]
139
+ )
140
+
141
+ return response