rakam-systems-vectorstore 0.1.1rc7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. rakam_systems_vectorstore/MANIFEST.in +26 -0
  2. rakam_systems_vectorstore/README.md +1071 -0
  3. rakam_systems_vectorstore/__init__.py +93 -0
  4. rakam_systems_vectorstore/components/__init__.py +0 -0
  5. rakam_systems_vectorstore/components/chunker/__init__.py +19 -0
  6. rakam_systems_vectorstore/components/chunker/advanced_chunker.py +1019 -0
  7. rakam_systems_vectorstore/components/chunker/text_chunker.py +154 -0
  8. rakam_systems_vectorstore/components/embedding_model/__init__.py +0 -0
  9. rakam_systems_vectorstore/components/embedding_model/configurable_embeddings.py +546 -0
  10. rakam_systems_vectorstore/components/embedding_model/openai_embeddings.py +259 -0
  11. rakam_systems_vectorstore/components/loader/__init__.py +31 -0
  12. rakam_systems_vectorstore/components/loader/adaptive_loader.py +512 -0
  13. rakam_systems_vectorstore/components/loader/code_loader.py +699 -0
  14. rakam_systems_vectorstore/components/loader/doc_loader.py +812 -0
  15. rakam_systems_vectorstore/components/loader/eml_loader.py +556 -0
  16. rakam_systems_vectorstore/components/loader/html_loader.py +626 -0
  17. rakam_systems_vectorstore/components/loader/md_loader.py +622 -0
  18. rakam_systems_vectorstore/components/loader/odt_loader.py +750 -0
  19. rakam_systems_vectorstore/components/loader/pdf_loader.py +771 -0
  20. rakam_systems_vectorstore/components/loader/pdf_loader_light.py +723 -0
  21. rakam_systems_vectorstore/components/loader/tabular_loader.py +597 -0
  22. rakam_systems_vectorstore/components/vectorstore/__init__.py +0 -0
  23. rakam_systems_vectorstore/components/vectorstore/apps.py +10 -0
  24. rakam_systems_vectorstore/components/vectorstore/configurable_pg_vector_store.py +1661 -0
  25. rakam_systems_vectorstore/components/vectorstore/faiss_vector_store.py +878 -0
  26. rakam_systems_vectorstore/components/vectorstore/migrations/0001_initial.py +55 -0
  27. rakam_systems_vectorstore/components/vectorstore/migrations/__init__.py +0 -0
  28. rakam_systems_vectorstore/components/vectorstore/models.py +10 -0
  29. rakam_systems_vectorstore/components/vectorstore/pg_models.py +97 -0
  30. rakam_systems_vectorstore/components/vectorstore/pg_vector_store.py +827 -0
  31. rakam_systems_vectorstore/config.py +266 -0
  32. rakam_systems_vectorstore/core.py +8 -0
  33. rakam_systems_vectorstore/pyproject.toml +113 -0
  34. rakam_systems_vectorstore/server/README.md +290 -0
  35. rakam_systems_vectorstore/server/__init__.py +20 -0
  36. rakam_systems_vectorstore/server/mcp_server_vector.py +325 -0
  37. rakam_systems_vectorstore/setup.py +103 -0
  38. rakam_systems_vectorstore-0.1.1rc7.dist-info/METADATA +370 -0
  39. rakam_systems_vectorstore-0.1.1rc7.dist-info/RECORD +40 -0
  40. rakam_systems_vectorstore-0.1.1rc7.dist-info/WHEEL +4 -0
@@ -0,0 +1,266 @@
1
+ """
2
+ Configuration management for Vector Store components.
3
+
4
+ This module provides a unified configuration system that supports:
5
+ - YAML/JSON configuration files
6
+ - Environment variable overrides
7
+ - Programmatic configuration
8
+ - Validation and defaults
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import os
14
+ import yaml
15
+ import json
16
+ from typing import Any, Dict, Optional, Union
17
+ from dataclasses import dataclass, field, asdict
18
+ from pathlib import Path
19
+
20
+
21
+ @dataclass
22
+ class EmbeddingConfig:
23
+ """Configuration for embedding models."""
24
+
25
+ model_type: str = "sentence_transformer" # sentence_transformer, openai, cohere, etc.
26
+ model_name: str = "Snowflake/snowflake-arctic-embed-m"
27
+ api_key: Optional[str] = None
28
+ batch_size: int = 128
29
+ normalize: bool = True
30
+ dimensions: Optional[int] = None # Auto-detected if None
31
+
32
+ def __post_init__(self):
33
+ # Load API key from environment if not provided
34
+ if self.model_type == "openai" and not self.api_key:
35
+ self.api_key = os.getenv("OPENAI_API_KEY")
36
+ elif self.model_type == "cohere" and not self.api_key:
37
+ self.api_key = os.getenv("COHERE_API_KEY")
38
+
39
+
40
+ @dataclass
41
+ class DatabaseConfig:
42
+ """Configuration for database connection."""
43
+
44
+ host: str = "localhost"
45
+ port: int = 5432
46
+ database: str = "vectorstore_db"
47
+ user: str = "postgres"
48
+ password: str = "postgres"
49
+ pool_size: int = 10
50
+ max_overflow: int = 20
51
+
52
+ def __post_init__(self):
53
+ # Load from environment variables if available
54
+ self.host = os.getenv("POSTGRES_HOST", self.host)
55
+ self.port = int(os.getenv("POSTGRES_PORT", str(self.port)))
56
+ self.database = os.getenv("POSTGRES_DB", self.database)
57
+ self.user = os.getenv("POSTGRES_USER", self.user)
58
+ self.password = os.getenv("POSTGRES_PASSWORD", self.password)
59
+
60
+ def to_connection_string(self) -> str:
61
+ """Generate PostgreSQL connection string."""
62
+ return f"postgresql://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}"
63
+
64
+
65
+ @dataclass
66
+ class SearchConfig:
67
+ """Configuration for search operations."""
68
+
69
+ similarity_metric: str = "cosine" # cosine, l2, dot_product
70
+ default_top_k: int = 5
71
+ enable_hybrid_search: bool = True
72
+ hybrid_alpha: float = 0.7 # Weight for vector similarity (1-alpha for keyword)
73
+ rerank: bool = True
74
+ search_buffer_factor: int = 2 # Retrieve more results for reranking
75
+
76
+ # Keyword search configuration
77
+ keyword_ranking_algorithm: str = "bm25" # bm25 or ts_rank
78
+ bm25_k1: float = 1.5 # BM25 term frequency saturation parameter
79
+ bm25_b: float = 0.75 # BM25 length normalization parameter
80
+
81
+ def validate(self):
82
+ """Validate search configuration."""
83
+ if self.similarity_metric not in ["cosine", "l2", "dot_product", "dot"]:
84
+ raise ValueError(f"Invalid similarity metric: {self.similarity_metric}")
85
+ if not 0 <= self.hybrid_alpha <= 1:
86
+ raise ValueError(f"hybrid_alpha must be between 0 and 1, got {self.hybrid_alpha}")
87
+ if self.default_top_k < 1:
88
+ raise ValueError(f"default_top_k must be >= 1, got {self.default_top_k}")
89
+ if self.keyword_ranking_algorithm not in ["bm25", "ts_rank"]:
90
+ raise ValueError(f"Invalid keyword ranking algorithm: {self.keyword_ranking_algorithm}")
91
+ if self.bm25_k1 < 0:
92
+ raise ValueError(f"bm25_k1 must be >= 0, got {self.bm25_k1}")
93
+ if not 0 <= self.bm25_b <= 1:
94
+ raise ValueError(f"bm25_b must be between 0 and 1, got {self.bm25_b}")
95
+
96
+
97
+ @dataclass
98
+ class IndexConfig:
99
+ """Configuration for indexing operations."""
100
+
101
+ chunk_size: int = 512
102
+ chunk_overlap: int = 50
103
+ enable_parallel_processing: bool = False
104
+ parallel_workers: int = 4
105
+ batch_insert_size: int = 10000
106
+
107
+
108
+ @dataclass
109
+ class VectorStoreConfig:
110
+ """Master configuration for Vector Store component."""
111
+
112
+ name: str = "pg_vector_store"
113
+ embedding: EmbeddingConfig = field(default_factory=EmbeddingConfig)
114
+ database: DatabaseConfig = field(default_factory=DatabaseConfig)
115
+ search: SearchConfig = field(default_factory=SearchConfig)
116
+ index: IndexConfig = field(default_factory=IndexConfig)
117
+
118
+ # Component-specific settings
119
+ enable_caching: bool = True
120
+ cache_size: int = 1000
121
+ enable_logging: bool = True
122
+ log_level: str = "INFO"
123
+
124
+ @classmethod
125
+ def from_dict(cls, config_dict: Dict[str, Any]) -> "VectorStoreConfig":
126
+ """Create configuration from dictionary."""
127
+ # Extract nested configs
128
+ embedding_config = EmbeddingConfig(**config_dict.get("embedding", {}))
129
+ database_config = DatabaseConfig(**config_dict.get("database", {}))
130
+ search_config = SearchConfig(**config_dict.get("search", {}))
131
+ index_config = IndexConfig(**config_dict.get("index", {}))
132
+
133
+ # Create main config
134
+ main_config = {
135
+ "name": config_dict.get("name", "pg_vector_store"),
136
+ "embedding": embedding_config,
137
+ "database": database_config,
138
+ "search": search_config,
139
+ "index": index_config,
140
+ "enable_caching": config_dict.get("enable_caching", True),
141
+ "cache_size": config_dict.get("cache_size", 1000),
142
+ "enable_logging": config_dict.get("enable_logging", True),
143
+ "log_level": config_dict.get("log_level", "INFO"),
144
+ }
145
+
146
+ return cls(**main_config)
147
+
148
+ @classmethod
149
+ def from_yaml(cls, yaml_path: Union[str, Path]) -> "VectorStoreConfig":
150
+ """Load configuration from YAML file."""
151
+ path = Path(yaml_path)
152
+ if not path.exists():
153
+ raise FileNotFoundError(f"Configuration file not found: {yaml_path}")
154
+
155
+ with open(path, 'r') as f:
156
+ config_dict = yaml.safe_load(f)
157
+
158
+ return cls.from_dict(config_dict)
159
+
160
+ @classmethod
161
+ def from_json(cls, json_path: Union[str, Path]) -> "VectorStoreConfig":
162
+ """Load configuration from JSON file."""
163
+ path = Path(json_path)
164
+ if not path.exists():
165
+ raise FileNotFoundError(f"Configuration file not found: {json_path}")
166
+
167
+ with open(path, 'r') as f:
168
+ config_dict = json.load(f)
169
+
170
+ return cls.from_dict(config_dict)
171
+
172
+ def to_dict(self) -> Dict[str, Any]:
173
+ """Convert configuration to dictionary."""
174
+ return {
175
+ "name": self.name,
176
+ "embedding": asdict(self.embedding),
177
+ "database": asdict(self.database),
178
+ "search": asdict(self.search),
179
+ "index": asdict(self.index),
180
+ "enable_caching": self.enable_caching,
181
+ "cache_size": self.cache_size,
182
+ "enable_logging": self.enable_logging,
183
+ "log_level": self.log_level,
184
+ }
185
+
186
+ def save_yaml(self, output_path: Union[str, Path]) -> None:
187
+ """Save configuration to YAML file."""
188
+ with open(output_path, 'w') as f:
189
+ yaml.dump(self.to_dict(), f, default_flow_style=False, sort_keys=False)
190
+
191
+ def save_json(self, output_path: Union[str, Path]) -> None:
192
+ """Save configuration to JSON file."""
193
+ with open(output_path, 'w') as f:
194
+ json.dump(self.to_dict(), f, indent=2)
195
+
196
+ def validate(self) -> None:
197
+ """Validate all configuration settings."""
198
+ self.search.validate()
199
+
200
+ # Validate embedding config
201
+ if self.embedding.batch_size < 1:
202
+ raise ValueError(f"embedding.batch_size must be >= 1, got {self.embedding.batch_size}")
203
+
204
+ # Validate index config
205
+ if self.index.chunk_size < 1:
206
+ raise ValueError(f"index.chunk_size must be >= 1, got {self.index.chunk_size}")
207
+ if self.index.chunk_overlap < 0:
208
+ raise ValueError(f"index.chunk_overlap must be >= 0, got {self.index.chunk_overlap}")
209
+ if self.index.chunk_overlap >= self.index.chunk_size:
210
+ raise ValueError("index.chunk_overlap must be less than index.chunk_size")
211
+
212
+
213
+ def load_config(
214
+ config_source: Optional[Union[str, Path, Dict[str, Any]]] = None,
215
+ config_type: str = "auto"
216
+ ) -> VectorStoreConfig:
217
+ """
218
+ Load configuration from various sources.
219
+
220
+ Args:
221
+ config_source: Path to config file, dict, or None for defaults
222
+ config_type: Type of config file ('yaml', 'json', 'auto')
223
+
224
+ Returns:
225
+ VectorStoreConfig instance
226
+ """
227
+ if config_source is None:
228
+ # Return default configuration
229
+ return VectorStoreConfig()
230
+
231
+ if isinstance(config_source, dict):
232
+ # Load from dictionary
233
+ return VectorStoreConfig.from_dict(config_source)
234
+
235
+ # Load from file
236
+ path = Path(config_source)
237
+ if not path.exists():
238
+ raise FileNotFoundError(f"Configuration file not found: {config_source}")
239
+
240
+ if config_type == "auto":
241
+ # Auto-detect based on file extension
242
+ suffix = path.suffix.lower()
243
+ if suffix in ['.yaml', '.yml']:
244
+ config_type = 'yaml'
245
+ elif suffix == '.json':
246
+ config_type = 'json'
247
+ else:
248
+ raise ValueError(f"Cannot auto-detect config type for file: {config_source}")
249
+
250
+ if config_type == 'yaml':
251
+ return VectorStoreConfig.from_yaml(path)
252
+ elif config_type == 'json':
253
+ return VectorStoreConfig.from_json(path)
254
+ else:
255
+ raise ValueError(f"Unsupported config type: {config_type}")
256
+
257
+
258
+ __all__ = [
259
+ "EmbeddingConfig",
260
+ "DatabaseConfig",
261
+ "SearchConfig",
262
+ "IndexConfig",
263
+ "VectorStoreConfig",
264
+ "load_config",
265
+ ]
266
+
@@ -0,0 +1,8 @@
1
+ """
2
+ Re-export core data structures from rakam-systems-core for backward compatibility.
3
+ These classes are now maintained in rakam_systems_core.ai_core.vs_core
4
+ """
5
+
6
+ from rakam_systems_core.ai_core.vs_core import Node, NodeMetadata, VSFile
7
+
8
+ __all__ = ["Node", "NodeMetadata", "VSFile"]
@@ -0,0 +1,113 @@
1
+ [project]
2
+ name = "rakam-systems-vectorstore"
3
+ version = "0.1.0"
4
+ description = "Modular vector store and RAG components for semantic search and retrieval"
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ license = {text = "Apache-2.0"}
8
+ authors = [
9
+ {name = "Mohamed Hilel", email = "mohammedjassemhlel@gmail.com"},
10
+ {name = "Peng Zheng", email = "pengzheng990630@outlook.com"}
11
+ ]
12
+ keywords = ["vector-store", "embeddings", "rag", "semantic-search", "pgvector", "faiss"]
13
+ classifiers = [
14
+ "License :: OSI Approved :: Apache Software License",
15
+ "Programming Language :: Python :: 3",
16
+ "Programming Language :: Python :: 3.10",
17
+ "Programming Language :: Python :: 3.11",
18
+ "Programming Language :: Python :: 3.12",
19
+ "Intended Audience :: Developers",
20
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
21
+ "Topic :: Software Development :: Libraries :: Python Modules",
22
+ ]
23
+
24
+ # Core dependencies required for base functionality
25
+ dependencies = [
26
+
27
+
28
+ "rakam-system-core",
29
+ "pyyaml>=6.0",
30
+ "numpy>=1.24.0",
31
+ "tqdm>=4.66.0",
32
+ ]
33
+
34
+ [project.optional-dependencies]
35
+ # PostgreSQL backend with Django ORM
36
+ postgres = [
37
+ "psycopg2-binary>=2.9.9",
38
+ "django>=4.0.0",
39
+ ]
40
+
41
+ # FAISS backend for in-memory vector search
42
+ faiss = [
43
+ "faiss-cpu>=1.12.0",
44
+ ]
45
+
46
+ # Local embedding models using SentenceTransformers
47
+ local-embeddings = [
48
+ "sentence-transformers>=5.1.0",
49
+ "torch>=2.0.0",
50
+ ]
51
+
52
+ # OpenAI embeddings
53
+ openai = [
54
+ "openai>=1.0.0",
55
+ ]
56
+
57
+ # Cohere embeddings
58
+ cohere = [
59
+ "cohere>=4.0.0",
60
+ ]
61
+
62
+ # Document loaders for various file types
63
+ loaders = [
64
+ "python-magic>=0.4.27",
65
+ "beautifulsoup4>=4.12.0",
66
+ "python-docx>=1.2.0",
67
+ "pymupdf>=1.24.0",
68
+ "pymupdf4llm>=0.0.17",
69
+ "docling==2.62.0",
70
+ "chonkie==1.4.2",
71
+ "odfpy==1.4.1",
72
+ ]
73
+
74
+ # Complete installation with all backends and features
75
+ all = [
76
+ "rakam-systems-vectorstore[postgres]",
77
+ "rakam-systems-vectorstore[faiss]",
78
+ "rakam-systems-vectorstore[local-embeddings]",
79
+ "rakam-systems-vectorstore[openai]",
80
+ "rakam-systems-vectorstore[cohere]",
81
+ "rakam-systems-vectorstore[loaders]",
82
+ ]
83
+
84
+ # Development dependencies
85
+ dev = [
86
+ "pytest>=7.0.0",
87
+ "pytest-django>=4.5.0",
88
+ "black>=23.0.0",
89
+ "ruff>=0.1.0",
90
+ ]
91
+
92
+ # ref: https://github.com/astral-sh/uv/issues/6371
93
+ [tool.uv.sources]
94
+ rakam-system-core = { workspace = true }
95
+
96
+ [project.urls]
97
+ Homepage = "https://github.com/Rakam-AI/rakam_systems"
98
+ Documentation = "https://github.com/Rakam-AI/rakam_systems"
99
+ Repository = "https://github.com/Rakam-AI/rakam_systems"
100
+ Issues = "https://github.com/Rakam-AI/rakam_systems/issues"
101
+
102
+
103
+ [build-system]
104
+ requires = ["hatchling"]
105
+ build-backend = "hatchling.build"
106
+
107
+
108
+ [tool.setuptools.packages.find]
109
+ where = [".."]
110
+ include = ["rakam_systems_vectorstore*", "rakam_systems_core*"]
111
+
112
+ [tool.setuptools.package-data]
113
+ "rakam_systems_vectorstore" = ["**/*.yaml", "**/*.yml", "**/*.json", "**/*.md"]
@@ -0,0 +1,290 @@
1
+ # Vector Store MCP Server
2
+
3
+ This module provides an MCP (Model Context Protocol) server for vector store operations, enabling AI agents to interact with vector databases through a standardized interface.
4
+
5
+ ## Overview
6
+
7
+ The Vector Store MCP Server creates a message-based interface that wraps vector store operations into reusable tool components. This allows AI agents to:
8
+
9
+ - Search for documents using semantic similarity
10
+ - Add documents to the vector store
11
+ - Query collection information and metadata
12
+ - Perform concurrent operations efficiently
13
+
14
+ ## Components
15
+
16
+ ### 1. VectorSearchTool
17
+
18
+ Performs semantic search on vector collections.
19
+
20
+ **Parameters:**
21
+
22
+ - `query` (str): Search query text
23
+ - `collection_name` (str, optional): Target collection (default: "documents")
24
+ - `top_k` (int, optional): Number of results to return (default: 5)
25
+
26
+ **Returns:**
27
+
28
+ ```python
29
+ {
30
+ 'query': str,
31
+ 'collection': str,
32
+ 'results_count': int,
33
+ 'results': List[{
34
+ 'content': str,
35
+ 'node_id': str,
36
+ 'source_file': str,
37
+ 'position': int,
38
+ 'metadata': dict
39
+ }]
40
+ }
41
+ ```
42
+
43
+ ### 2. VectorStorageTool
44
+
45
+ Adds documents to the vector store.
46
+
47
+ **Parameters:**
48
+
49
+ - `documents` (List[str]): List of document texts to add
50
+ - `collection_name` (str, optional): Target collection (default: "documents")
51
+ - `doc_metadata` (dict, optional): Metadata to attach to documents
52
+
53
+ **Returns:**
54
+
55
+ ```python
56
+ {
57
+ 'success': bool,
58
+ 'collection': str,
59
+ 'documents_added': int,
60
+ 'node_ids': List[str]
61
+ }
62
+ ```
63
+
64
+ ### 3. VectorInfoTool
65
+
66
+ Retrieves information about collections.
67
+
68
+ **Parameters:**
69
+
70
+ - `collection_name` (str, optional): Specific collection name (lists all if not provided)
71
+
72
+ **Returns:**
73
+
74
+ ```python
75
+ # For specific collection:
76
+ {
77
+ 'collection_name': str,
78
+ 'node_count': int,
79
+ 'embedding_dim': int
80
+ }
81
+
82
+ # For all collections:
83
+ {
84
+ 'total_collections': int,
85
+ 'collections': List[str]
86
+ }
87
+ ```
88
+
89
+ ## Usage
90
+
91
+ ### Basic Setup
92
+
93
+ ```python
94
+ from rakam_systems_vectorstore.server import run_vector_mcp
95
+ from rakam_systems_vectorstore.components.vectorstore.configurable_pg_vector_store import ConfigurablePgVectorStore
96
+ from rakam_systems_vectorstore.config import VectorStoreConfig, EmbeddingConfig
97
+
98
+ # Initialize vector store
99
+ config = VectorStoreConfig(
100
+ name="my_store",
101
+ embedding=EmbeddingConfig(
102
+ model_type="sentence_transformer",
103
+ model_name="Snowflake/snowflake-arctic-embed-m"
104
+ )
105
+ )
106
+
107
+ vector_store = ConfigurablePgVectorStore(name="my_store", config=config)
108
+ vector_store.setup()
109
+
110
+ # Create MCP server
111
+ mcp_server = run_vector_mcp(vector_store)
112
+
113
+ # List available tools
114
+ print(mcp_server.list_components())
115
+ # Output: ['vector_info', 'vector_search', 'vector_storage']
116
+ ```
117
+
118
+ ### Using with Async Messages
119
+
120
+ ```python
121
+ # Search for documents
122
+ result = await mcp_server.asend_message(
123
+ sender="agent",
124
+ receiver="vector_search",
125
+ message={
126
+ 'arguments': {
127
+ 'query': 'machine learning',
128
+ 'collection_name': 'docs',
129
+ 'top_k': 5
130
+ }
131
+ }
132
+ )
133
+
134
+ # Add documents
135
+ result = await mcp_server.asend_message(
136
+ sender="agent",
137
+ receiver="vector_storage",
138
+ message={
139
+ 'arguments': {
140
+ 'documents': ['Doc 1 text', 'Doc 2 text'],
141
+ 'collection_name': 'docs'
142
+ }
143
+ }
144
+ )
145
+
146
+ # Get collection info
147
+ result = await mcp_server.asend_message(
148
+ sender="agent",
149
+ receiver="vector_info",
150
+ message={
151
+ 'arguments': {
152
+ 'collection_name': 'docs'
153
+ }
154
+ }
155
+ )
156
+ ```
157
+
158
+ ### Integration with Tool Registry
159
+
160
+ ```python
161
+ from rakam_systems_core.ai_core.interfaces import ToolRegistry, ToolInvoker
162
+
163
+ # Create registry and invoker
164
+ registry = ToolRegistry()
165
+ invoker = ToolInvoker(registry)
166
+
167
+ # Register MCP server
168
+ invoker.register_mcp_server("vector_store_mcp", mcp_server)
169
+
170
+ # Register tools
171
+ registry.register_mcp_tool(
172
+ name="search_docs",
173
+ mcp_server="vector_store_mcp",
174
+ mcp_tool_name="vector_search",
175
+ description="Search documents using semantic similarity",
176
+ category="vector_store"
177
+ )
178
+
179
+ # Use through invoker
180
+ results = await invoker.ainvoke(
181
+ "search_docs",
182
+ query="What is AI?",
183
+ top_k=3
184
+ )
185
+ ```
186
+
187
+ ### Concurrent Operations
188
+
189
+ ```python
190
+ import asyncio
191
+
192
+ # Execute multiple operations concurrently
193
+ results = await asyncio.gather(
194
+ mcp_server.asend_message(
195
+ sender="agent",
196
+ receiver="vector_search",
197
+ message={'arguments': {'query': 'machine learning', 'top_k': 3}}
198
+ ),
199
+ mcp_server.asend_message(
200
+ sender="agent",
201
+ receiver="vector_search",
202
+ message={'arguments': {'query': 'deep learning', 'top_k': 3}}
203
+ ),
204
+ mcp_server.asend_message(
205
+ sender="agent",
206
+ receiver="vector_info",
207
+ message={'arguments': {}}
208
+ )
209
+ )
210
+ ```
211
+
212
+ ## Function Reference
213
+
214
+ ### run_vector_mcp()
215
+
216
+ Creates and configures an MCP server with vector store tools.
217
+
218
+ ```python
219
+ def run_vector_mcp(
220
+ vector_store,
221
+ name: str = "vector_store_mcp",
222
+ enable_logging: bool = False
223
+ ) -> MCPServer
224
+ ```
225
+
226
+ **Parameters:**
227
+
228
+ - `vector_store`: ConfigurablePgVectorStore instance (must be set up)
229
+ - `name` (str, optional): Name for the MCP server (default: "vector_store_mcp")
230
+ - `enable_logging` (bool, optional): Enable detailed MCP logging (default: False)
231
+
232
+ **Returns:**
233
+
234
+ - `MCPServer`: Configured MCP server instance with registered tools
235
+
236
+ ## Examples
237
+
238
+ See the comprehensive examples in:
239
+
240
+ - `/examples/mcp_vector_search_example.py` - Full MCP vector search demonstration
241
+ - `/docs/MCP_VECTOR_STORE_GUIDE.md` - Complete guide to MCP with vector stores
242
+
243
+ ## Architecture
244
+
245
+ The MCP server follows a modular architecture:
246
+
247
+ ```
248
+ ┌─────────────────────────────────────────┐
249
+ │ AI Agent / Client │
250
+ └──────────────┬──────────────────────────┘
251
+
252
+ │ send_message / asend_message
253
+
254
+ ┌─────────────────────────────────────────┐
255
+ │ MCPServer │
256
+ │ (Message Router & Component Registry) │
257
+ └──────────────┬──────────────────────────┘
258
+
259
+ ┌─────────┼─────────┐
260
+ ▼ ▼ ▼
261
+ ┌─────────┐ ┌──────────┐ ┌──────────┐
262
+ │ Vector │ │ Vector │ │ Vector │
263
+ │ Search │ │ Storage │ │ Info │
264
+ │ Tool │ │ Tool │ │ Tool │
265
+ └────┬────┘ └────┬─────┘ └────┬─────┘
266
+ │ │ │
267
+ └───────────┴─────────────┘
268
+
269
+
270
+ ┌────────────────────────┐
271
+ │ ConfigurablePgVector │
272
+ │ Store │
273
+ └────────────────────────┘
274
+ ```
275
+
276
+ ## Benefits
277
+
278
+ 1. **Decoupled Architecture**: Tools don't need direct dependencies on the vector store
279
+ 2. **Standardized Interface**: All tools work the same way through MCP
280
+ 3. **Easy Extension**: Add new tools without modifying existing code
281
+ 4. **Tool Discovery**: Agents can query available tools dynamically
282
+ 5. **Async Support**: Efficient concurrent operations
283
+ 6. **Integration Ready**: Works seamlessly with BaseAgent and other AI components
284
+
285
+ ## Related Documentation
286
+
287
+ - [MCP Server API](/app/rakam_systems/rakam_systems/ai_core/mcp/README.md)
288
+ - [MCP Vector Store Guide](/docs/MCP_VECTOR_STORE_GUIDE.md)
289
+ - [MCP Quickstart](/docs/MCP_QUICKSTART.md)
290
+ - [Vector Search Architecture](/docs/VECTOR_SEARCH_ARCHITECTURE.md)
@@ -0,0 +1,20 @@
1
+ """
2
+ Vector Store MCP Server Module
3
+
4
+ This module provides MCP (Model Context Protocol) server functionality for vector store operations.
5
+ """
6
+
7
+ from .mcp_server_vector import (
8
+ run_vector_mcp,
9
+ VectorSearchTool,
10
+ VectorStorageTool,
11
+ VectorInfoTool,
12
+ )
13
+
14
+ __all__ = [
15
+ "run_vector_mcp",
16
+ "VectorSearchTool",
17
+ "VectorStorageTool",
18
+ "VectorInfoTool",
19
+ ]
20
+