rakam-systems-vectorstore 0.1.1rc7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. rakam_systems_vectorstore/MANIFEST.in +26 -0
  2. rakam_systems_vectorstore/README.md +1071 -0
  3. rakam_systems_vectorstore/__init__.py +93 -0
  4. rakam_systems_vectorstore/components/__init__.py +0 -0
  5. rakam_systems_vectorstore/components/chunker/__init__.py +19 -0
  6. rakam_systems_vectorstore/components/chunker/advanced_chunker.py +1019 -0
  7. rakam_systems_vectorstore/components/chunker/text_chunker.py +154 -0
  8. rakam_systems_vectorstore/components/embedding_model/__init__.py +0 -0
  9. rakam_systems_vectorstore/components/embedding_model/configurable_embeddings.py +546 -0
  10. rakam_systems_vectorstore/components/embedding_model/openai_embeddings.py +259 -0
  11. rakam_systems_vectorstore/components/loader/__init__.py +31 -0
  12. rakam_systems_vectorstore/components/loader/adaptive_loader.py +512 -0
  13. rakam_systems_vectorstore/components/loader/code_loader.py +699 -0
  14. rakam_systems_vectorstore/components/loader/doc_loader.py +812 -0
  15. rakam_systems_vectorstore/components/loader/eml_loader.py +556 -0
  16. rakam_systems_vectorstore/components/loader/html_loader.py +626 -0
  17. rakam_systems_vectorstore/components/loader/md_loader.py +622 -0
  18. rakam_systems_vectorstore/components/loader/odt_loader.py +750 -0
  19. rakam_systems_vectorstore/components/loader/pdf_loader.py +771 -0
  20. rakam_systems_vectorstore/components/loader/pdf_loader_light.py +723 -0
  21. rakam_systems_vectorstore/components/loader/tabular_loader.py +597 -0
  22. rakam_systems_vectorstore/components/vectorstore/__init__.py +0 -0
  23. rakam_systems_vectorstore/components/vectorstore/apps.py +10 -0
  24. rakam_systems_vectorstore/components/vectorstore/configurable_pg_vector_store.py +1661 -0
  25. rakam_systems_vectorstore/components/vectorstore/faiss_vector_store.py +878 -0
  26. rakam_systems_vectorstore/components/vectorstore/migrations/0001_initial.py +55 -0
  27. rakam_systems_vectorstore/components/vectorstore/migrations/__init__.py +0 -0
  28. rakam_systems_vectorstore/components/vectorstore/models.py +10 -0
  29. rakam_systems_vectorstore/components/vectorstore/pg_models.py +97 -0
  30. rakam_systems_vectorstore/components/vectorstore/pg_vector_store.py +827 -0
  31. rakam_systems_vectorstore/config.py +266 -0
  32. rakam_systems_vectorstore/core.py +8 -0
  33. rakam_systems_vectorstore/pyproject.toml +113 -0
  34. rakam_systems_vectorstore/server/README.md +290 -0
  35. rakam_systems_vectorstore/server/__init__.py +20 -0
  36. rakam_systems_vectorstore/server/mcp_server_vector.py +325 -0
  37. rakam_systems_vectorstore/setup.py +103 -0
  38. rakam_systems_vectorstore-0.1.1rc7.dist-info/METADATA +370 -0
  39. rakam_systems_vectorstore-0.1.1rc7.dist-info/RECORD +40 -0
  40. rakam_systems_vectorstore-0.1.1rc7.dist-info/WHEEL +4 -0
@@ -0,0 +1,325 @@
1
+ """
2
+ Vector Store MCP Server
3
+
4
+ This module provides an MCP (Model Context Protocol) server for vector store operations.
5
+ It creates a unified interface for vector search, storage, and management operations
6
+ that can be easily integrated with AI agents.
7
+
8
+ Features:
9
+ - Vector search with semantic similarity
10
+ - Document storage and indexing
11
+ - Collection management and info
12
+ - Async/await support for concurrent operations
13
+
14
+ Example:
15
+ >>> from rakam_systems_vectorstore.server.mcp_server_vector import run_vector_mcp
16
+ >>> from rakam_systems_vectorstore.components.vectorstore.configurable_pg_vector_store import ConfigurablePgVectorStore
17
+ >>>
18
+ >>> # Initialize vector store
19
+ >>> vector_store = ConfigurablePgVectorStore(name="my_store")
20
+ >>> vector_store.setup()
21
+ >>>
22
+ >>> # Create MCP server with vector store tools
23
+ >>> mcp_server = run_vector_mcp(vector_store)
24
+ >>>
25
+ >>> # Use the server to route messages
26
+ >>> result = await mcp_server.asend_message(
27
+ ... sender="agent",
28
+ ... receiver="vector_search",
29
+ ... message={'arguments': {'query': 'test', 'top_k': 5}}
30
+ ... )
31
+ """
32
+
33
+ from __future__ import annotations
34
+ from typing import Dict, Any, List, Optional
35
+
36
+ from rakam_systems_core.ai_utils import logging
37
+ from rakam_systems_core.ai_core.mcp.mcp_server import MCPServer
38
+ from rakam_systems_core.ai_core.interfaces import ToolComponent
39
+
40
+ logger = logging.getLogger(__name__)
41
+
42
+
43
+ class VectorSearchTool(ToolComponent):
44
+ """Tool component for performing vector search operations."""
45
+
46
+ def __init__(self, name: str, vector_store, config: Optional[Dict] = None):
47
+ """
48
+ Initialize vector search tool.
49
+
50
+ Args:
51
+ name: Tool name
52
+ vector_store: ConfigurablePgVectorStore instance
53
+ config: Optional configuration dictionary
54
+ """
55
+ super().__init__(name, config or {})
56
+ self.vector_store = vector_store
57
+
58
+ async def run(
59
+ self,
60
+ query: str,
61
+ collection_name: str = "documents",
62
+ top_k: int = 5,
63
+ **kwargs
64
+ ) -> Dict[str, Any]:
65
+ """
66
+ Search the vector store for relevant documents.
67
+
68
+ Args:
69
+ query: Search query text
70
+ collection_name: Name of the collection to search
71
+ top_k: Number of results to return
72
+ **kwargs: Additional search parameters
73
+
74
+ Returns:
75
+ Dictionary with search results containing:
76
+ - query: Original query
77
+ - collection: Collection name
78
+ - results_count: Number of results
79
+ - results: List of result documents with content and metadata
80
+ """
81
+ from asgiref.sync import sync_to_async
82
+
83
+ try:
84
+ # Perform search using the vector store (wrap in sync_to_async)
85
+ results, result_nodes = await sync_to_async(self.vector_store.search)(
86
+ collection_name=collection_name,
87
+ query=query,
88
+ number=top_k,
89
+ **kwargs
90
+ )
91
+
92
+ # Format results
93
+ formatted_results = []
94
+ for node in result_nodes:
95
+ formatted_results.append({
96
+ 'content': node.content,
97
+ 'node_id': node.metadata.node_id,
98
+ 'source_file': node.metadata.source_file_uuid,
99
+ 'position': node.metadata.position,
100
+ 'metadata': node.metadata.custom or {}
101
+ })
102
+
103
+ return {
104
+ 'query': query,
105
+ 'collection': collection_name,
106
+ 'results_count': len(formatted_results),
107
+ 'results': formatted_results
108
+ }
109
+ except Exception as e:
110
+ logger.error(f"Vector search failed: {str(e)}", exc_info=True)
111
+ return {
112
+ 'error': str(e),
113
+ 'query': query,
114
+ 'collection': collection_name
115
+ }
116
+
117
+
118
+ class VectorStorageTool(ToolComponent):
119
+ """Tool component for adding documents to vector store."""
120
+
121
+ def __init__(self, name: str, vector_store, config: Optional[Dict] = None):
122
+ """
123
+ Initialize vector storage tool.
124
+
125
+ Args:
126
+ name: Tool name
127
+ vector_store: ConfigurablePgVectorStore instance
128
+ config: Optional configuration dictionary
129
+ """
130
+ super().__init__(name, config or {})
131
+ self.vector_store = vector_store
132
+
133
+ async def run(
134
+ self,
135
+ documents: List[str],
136
+ collection_name: str = "documents",
137
+ doc_metadata: Optional[Dict] = None
138
+ ) -> Dict[str, Any]:
139
+ """
140
+ Add documents to the vector store.
141
+
142
+ Args:
143
+ documents: List of document texts to add
144
+ collection_name: Target collection name
145
+ doc_metadata: Optional metadata to attach to documents
146
+
147
+ Returns:
148
+ Dictionary with operation results containing:
149
+ - success: Boolean indicating success
150
+ - collection: Collection name
151
+ - documents_added: Number of documents added
152
+ - node_ids: List of created node IDs
153
+ """
154
+ from rakam_systems_vectorstore.core import Node, NodeMetadata
155
+ from asgiref.sync import sync_to_async
156
+
157
+ try:
158
+ # Create nodes from documents
159
+ nodes = []
160
+ for idx, doc_text in enumerate(documents):
161
+ node_metadata = NodeMetadata(
162
+ source_file_uuid="mcp_upload",
163
+ position=idx,
164
+ custom=doc_metadata or {}
165
+ )
166
+ node = Node(content=doc_text, metadata=node_metadata)
167
+ nodes.append(node)
168
+
169
+ # Add to vector store (wrap in sync_to_async)
170
+ await sync_to_async(self.vector_store.create_collection_from_nodes)(
171
+ collection_name, nodes
172
+ )
173
+
174
+ return {
175
+ 'success': True,
176
+ 'collection': collection_name,
177
+ 'documents_added': len(documents),
178
+ 'node_ids': [node.metadata.node_id for node in nodes]
179
+ }
180
+ except Exception as e:
181
+ logger.error(f"Vector storage failed: {str(e)}", exc_info=True)
182
+ return {
183
+ 'success': False,
184
+ 'error': str(e)
185
+ }
186
+
187
+
188
+ class VectorInfoTool(ToolComponent):
189
+ """Tool component for getting vector store information."""
190
+
191
+ def __init__(self, name: str, vector_store, config: Optional[Dict] = None):
192
+ """
193
+ Initialize vector info tool.
194
+
195
+ Args:
196
+ name: Tool name
197
+ vector_store: ConfigurablePgVectorStore instance
198
+ config: Optional configuration dictionary
199
+ """
200
+ super().__init__(name, config or {})
201
+ self.vector_store = vector_store
202
+
203
+ async def run(self, collection_name: Optional[str] = None) -> Dict[str, Any]:
204
+ """
205
+ Get information about vector store collections.
206
+
207
+ Args:
208
+ collection_name: Specific collection name (optional)
209
+
210
+ Returns:
211
+ Dictionary with collection information:
212
+ - If collection_name provided: info about that collection
213
+ - If not provided: list of all collections
214
+ """
215
+ from asgiref.sync import sync_to_async
216
+
217
+ try:
218
+ if collection_name:
219
+ # Get info for specific collection
220
+ info = await sync_to_async(self.vector_store.get_collection_info)(
221
+ collection_name
222
+ )
223
+ return {
224
+ 'collection_name': collection_name,
225
+ 'node_count': info.get('node_count', 0),
226
+ 'embedding_dim': info.get('embedding_dim', 0),
227
+ }
228
+ else:
229
+ # List all collections
230
+ collections = await sync_to_async(self.vector_store.list_collections)()
231
+ return {
232
+ 'total_collections': len(collections),
233
+ 'collections': collections
234
+ }
235
+ except Exception as e:
236
+ logger.error(
237
+ f"Vector info retrieval failed: {str(e)}", exc_info=True)
238
+ return {
239
+ 'error': str(e)
240
+ }
241
+
242
+
243
+ def run_vector_mcp(
244
+ vector_store,
245
+ name: str = "vector_store_mcp",
246
+ enable_logging: bool = False
247
+ ) -> MCPServer:
248
+ """
249
+ Create and configure an MCP server with vector store tools.
250
+
251
+ This function creates a fully configured MCP server with three main tools:
252
+ - vector_search: Search documents using semantic similarity
253
+ - vector_storage: Add documents to the vector store
254
+ - vector_info: Get information about collections
255
+
256
+ Args:
257
+ vector_store: ConfigurablePgVectorStore instance (must be set up)
258
+ name: Name for the MCP server (default: "vector_store_mcp")
259
+ enable_logging: Whether to enable detailed MCP logging (default: False)
260
+
261
+ Returns:
262
+ MCPServer instance with registered vector store tools
263
+
264
+ Example:
265
+ >>> from rakam_systems_vectorstore.config import VectorStoreConfig, EmbeddingConfig
266
+ >>> from rakam_systems_vectorstore.components.vectorstore.configurable_pg_vector_store import ConfigurablePgVectorStore
267
+ >>>
268
+ >>> # Initialize vector store
269
+ >>> config = VectorStoreConfig(
270
+ ... name="my_store",
271
+ ... embedding=EmbeddingConfig(
272
+ ... model_type="sentence_transformer",
273
+ ... model_name="Snowflake/snowflake-arctic-embed-m"
274
+ ... )
275
+ ... )
276
+ >>> vector_store = ConfigurablePgVectorStore(name="my_store", config=config)
277
+ >>> vector_store.setup()
278
+ >>>
279
+ >>> # Create MCP server
280
+ >>> mcp_server = run_vector_mcp(vector_store)
281
+ >>>
282
+ >>> # List registered tools
283
+ >>> print(mcp_server.list_components())
284
+ >>> # Output: ['vector_info', 'vector_search', 'vector_storage']
285
+ >>>
286
+ >>> # Use the server
287
+ >>> result = await mcp_server.asend_message(
288
+ ... sender="client",
289
+ ... receiver="vector_search",
290
+ ... message={'arguments': {'query': 'machine learning', 'top_k': 3}}
291
+ ... )
292
+ """
293
+ logger.info(f"Creating vector store MCP server: {name}")
294
+
295
+ # Create MCP server
296
+ server = MCPServer(name=name, enable_logging=enable_logging)
297
+ server.setup()
298
+
299
+ # Create and register tool components
300
+ search_tool = VectorSearchTool(
301
+ name="vector_search",
302
+ vector_store=vector_store
303
+ )
304
+
305
+ storage_tool = VectorStorageTool(
306
+ name="vector_storage",
307
+ vector_store=vector_store
308
+ )
309
+
310
+ info_tool = VectorInfoTool(
311
+ name="vector_info",
312
+ vector_store=vector_store
313
+ )
314
+
315
+ # Register all tools
316
+ server.register_component(search_tool)
317
+ server.register_component(storage_tool)
318
+ server.register_component(info_tool)
319
+
320
+ logger.info(
321
+ f"Vector MCP server '{name}' ready with {len(server)} tool(s): "
322
+ f"{server.list_components()}"
323
+ )
324
+
325
+ return server
@@ -0,0 +1,103 @@
1
+ """
2
+ Setup file for ai_vectorstore standalone submodule.
3
+
4
+ This allows the ai_vectorstore to be installed independently or as part of rakam-systems.
5
+ """
6
+ from setuptools import setup, find_packages
7
+ from pathlib import Path
8
+
9
+ # Read the README
10
+ readme_path = Path(__file__).parent / "README.md"
11
+ long_description = ""
12
+ if readme_path.exists():
13
+ long_description = readme_path.read_text(encoding="utf-8")
14
+
15
+ setup(
16
+ name="rakam-systems-ai-vectorstore",
17
+ version="1.1.0",
18
+ author="Mohamed Hilel, Peng Zheng",
19
+ author_email="mohammedjassemhlel@gmail.com, pengzheng990630@outlook.com",
20
+ description="Modular vector store and RAG components for semantic search and retrieval",
21
+ long_description=long_description,
22
+ long_description_content_type="text/markdown",
23
+ url="https://github.com/Rakam-AI/rakam_systems",
24
+ project_urls={
25
+ "Documentation": "https://github.com/Rakam-AI/rakam_systems",
26
+ "Source": "https://github.com/Rakam-AI/rakam_systems",
27
+ "Issues": "https://github.com/Rakam-AI/rakam_systems/issues",
28
+ },
29
+ packages=find_packages(
30
+ where="..",
31
+ include=["rakam_systems_vectorstore*", "rakam_systems_core.ai_core*"]
32
+ ),
33
+ package_dir={"": ".."},
34
+ python_requires=">=3.10",
35
+ install_requires=[
36
+ "pyyaml>=6.0",
37
+ "numpy>=1.24.0",
38
+ "tqdm>=4.66.0",
39
+ ],
40
+ extras_require={
41
+ "postgres": [
42
+ "psycopg2-binary>=2.9.9",
43
+ "django>=4.0.0",
44
+ ],
45
+ "faiss": [
46
+ "faiss-cpu>=1.12.0",
47
+ ],
48
+ "local-embeddings": [
49
+ "sentence-transformers>=5.1.0",
50
+ "torch>=2.0.0",
51
+ ],
52
+ "openai": [
53
+ "openai>=1.0.0",
54
+ ],
55
+ "cohere": [
56
+ "cohere>=4.0.0",
57
+ ],
58
+ "loaders": [
59
+ "python-magic>=0.4.27",
60
+ "beautifulsoup4>=4.12.0",
61
+ "python-docx>=1.2.0",
62
+ "pymupdf>=1.24.0",
63
+ "pymupdf4llm>=0.0.17",
64
+ ],
65
+ "all": [
66
+ "psycopg2-binary>=2.9.9",
67
+ "django>=4.0.0",
68
+ "faiss-cpu>=1.12.0",
69
+ "sentence-transformers>=5.1.0",
70
+ "torch>=2.0.0",
71
+ "openai>=1.0.0",
72
+ "cohere>=4.0.0",
73
+ "python-magic>=0.4.27",
74
+ "beautifulsoup4>=4.12.0",
75
+ "python-docx>=1.2.0",
76
+ "pymupdf>=1.24.0",
77
+ "pymupdf4llm>=0.0.17",
78
+ ],
79
+ "dev": [
80
+ "pytest>=7.0.0",
81
+ "pytest-django>=4.5.0",
82
+ "black>=23.0.0",
83
+ "ruff>=0.1.0",
84
+ ],
85
+ },
86
+ classifiers=[
87
+ "Development Status :: 4 - Beta",
88
+ "Intended Audience :: Developers",
89
+ "License :: OSI Approved :: Apache Software License",
90
+ "Operating System :: OS Independent",
91
+ "Programming Language :: Python :: 3",
92
+ "Programming Language :: Python :: 3.10",
93
+ "Programming Language :: Python :: 3.11",
94
+ "Programming Language :: Python :: 3.12",
95
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
96
+ "Topic :: Software Development :: Libraries :: Python Modules",
97
+ ],
98
+ keywords="vector-store embeddings rag semantic-search pgvector faiss",
99
+ include_package_data=True,
100
+ package_data={
101
+ "rakam_systems_vectorstore": ["**/*.yaml", "**/*.yml", "**/*.json", "**/*.md"],
102
+ },
103
+ )