rakam-systems-vectorstore 0.1.1rc7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rakam_systems_vectorstore-0.1.1rc7/.gitignore +46 -0
- rakam_systems_vectorstore-0.1.1rc7/.python-version +1 -0
- rakam_systems_vectorstore-0.1.1rc7/PKG-INFO +370 -0
- rakam_systems_vectorstore-0.1.1rc7/README.md +301 -0
- rakam_systems_vectorstore-0.1.1rc7/main.py +6 -0
- rakam_systems_vectorstore-0.1.1rc7/pyproject.toml +111 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/MANIFEST.in +26 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/README.md +1071 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/__init__.py +93 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/components/__init__.py +0 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/components/chunker/__init__.py +19 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/components/chunker/advanced_chunker.py +1019 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/components/chunker/text_chunker.py +154 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/components/embedding_model/__init__.py +0 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/components/embedding_model/configurable_embeddings.py +546 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/components/embedding_model/openai_embeddings.py +259 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/components/loader/__init__.py +31 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/components/loader/adaptive_loader.py +512 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/components/loader/code_loader.py +699 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/components/loader/doc_loader.py +812 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/components/loader/eml_loader.py +556 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/components/loader/html_loader.py +626 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/components/loader/md_loader.py +622 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/components/loader/odt_loader.py +750 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/components/loader/pdf_loader.py +771 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/components/loader/pdf_loader_light.py +723 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/components/loader/tabular_loader.py +597 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/components/vectorstore/__init__.py +0 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/components/vectorstore/apps.py +10 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/components/vectorstore/configurable_pg_vector_store.py +1661 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/components/vectorstore/faiss_vector_store.py +878 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/components/vectorstore/migrations/0001_initial.py +55 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/components/vectorstore/migrations/__init__.py +0 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/components/vectorstore/models.py +10 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/components/vectorstore/pg_models.py +97 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/components/vectorstore/pg_vector_store.py +827 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/config.py +266 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/core.py +8 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/pyproject.toml +113 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/server/README.md +290 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/server/__init__.py +20 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/server/mcp_server_vector.py +325 -0
- rakam_systems_vectorstore-0.1.1rc7/src/rakam_systems_vectorstore/setup.py +103 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Python specific
|
|
2
|
+
*.pyc
|
|
3
|
+
*.pyo
|
|
4
|
+
*.pyd
|
|
5
|
+
__pycache__/
|
|
6
|
+
.pytest_cache/
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Environments
|
|
10
|
+
.env
|
|
11
|
+
*.env
|
|
12
|
+
*.venv*
|
|
13
|
+
venv/
|
|
14
|
+
*venv/
|
|
15
|
+
ENV/
|
|
16
|
+
env/
|
|
17
|
+
env.bak/
|
|
18
|
+
venv.bak/
|
|
19
|
+
|
|
20
|
+
# VS Code
|
|
21
|
+
.vscode/
|
|
22
|
+
.vscode/*
|
|
23
|
+
|
|
24
|
+
# PyCharm
|
|
25
|
+
.idea/
|
|
26
|
+
.idea/*
|
|
27
|
+
|
|
28
|
+
# OS specific
|
|
29
|
+
.DS_Store
|
|
30
|
+
Thumbs.db
|
|
31
|
+
|
|
32
|
+
#data
|
|
33
|
+
data/
|
|
34
|
+
dist/
|
|
35
|
+
logs/
|
|
36
|
+
|
|
37
|
+
# Build artifacts
|
|
38
|
+
*.egg-info/
|
|
39
|
+
|
|
40
|
+
# tracking data
|
|
41
|
+
agent_tracking/
|
|
42
|
+
|
|
43
|
+
# docs
|
|
44
|
+
docs/
|
|
45
|
+
|
|
46
|
+
temp_path/
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.11
|
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: rakam-systems-vectorstore
|
|
3
|
+
Version: 0.1.1rc7
|
|
4
|
+
Summary: Utility package for interacting with vectorstores
|
|
5
|
+
Project-URL: Homepage, https://github.com/Rakam-AI/rakam_systems-inhouse
|
|
6
|
+
Project-URL: Documentation, https://github.com/Rakam-AI/rakam_systems-inhouse
|
|
7
|
+
Project-URL: Repository, https://github.com/Rakam-AI/rakam_systems-inhouse
|
|
8
|
+
Project-URL: Issues, https://github.com/Rakam-AI/rakam_systems-inhouse/issues
|
|
9
|
+
Author-email: Mohamed Hilel <mohammedjassemhlel@gmail.com>, Peng Zheng <pengzheng990630@outlook.com>
|
|
10
|
+
Keywords: embeddings,faiss,pgvector,rag,semantic-search,vector-store
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Requires-Dist: numpy>=1.24.0
|
|
21
|
+
Requires-Dist: pyyaml>=6.0
|
|
22
|
+
Requires-Dist: rakam-system-core
|
|
23
|
+
Requires-Dist: tqdm>=4.66.0
|
|
24
|
+
Provides-Extra: all
|
|
25
|
+
Requires-Dist: beautifulsoup4>=4.12.0; extra == 'all'
|
|
26
|
+
Requires-Dist: chonkie==1.4.2; extra == 'all'
|
|
27
|
+
Requires-Dist: cohere>=4.0.0; extra == 'all'
|
|
28
|
+
Requires-Dist: django>=4.0.0; extra == 'all'
|
|
29
|
+
Requires-Dist: docling==2.62.0; extra == 'all'
|
|
30
|
+
Requires-Dist: faiss-cpu>=1.12.0; extra == 'all'
|
|
31
|
+
Requires-Dist: odfpy==1.4.1; extra == 'all'
|
|
32
|
+
Requires-Dist: openai>=1.0.0; extra == 'all'
|
|
33
|
+
Requires-Dist: pgvector; extra == 'all'
|
|
34
|
+
Requires-Dist: psycopg2-binary>=2.9.9; extra == 'all'
|
|
35
|
+
Requires-Dist: pymupdf4llm>=0.0.17; extra == 'all'
|
|
36
|
+
Requires-Dist: pymupdf>=1.24.0; extra == 'all'
|
|
37
|
+
Requires-Dist: python-docx>=1.2.0; extra == 'all'
|
|
38
|
+
Requires-Dist: python-magic>=0.4.27; extra == 'all'
|
|
39
|
+
Requires-Dist: sentence-transformers>=5.1.0; extra == 'all'
|
|
40
|
+
Requires-Dist: torch>=2.0.0; extra == 'all'
|
|
41
|
+
Provides-Extra: cohere
|
|
42
|
+
Requires-Dist: cohere>=4.0.0; extra == 'cohere'
|
|
43
|
+
Provides-Extra: dev
|
|
44
|
+
Requires-Dist: black>=23.0.0; extra == 'dev'
|
|
45
|
+
Requires-Dist: pytest-django>=4.5.0; extra == 'dev'
|
|
46
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
47
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
48
|
+
Provides-Extra: faiss
|
|
49
|
+
Requires-Dist: faiss-cpu>=1.12.0; extra == 'faiss'
|
|
50
|
+
Provides-Extra: loaders
|
|
51
|
+
Requires-Dist: beautifulsoup4>=4.12.0; extra == 'loaders'
|
|
52
|
+
Requires-Dist: chonkie==1.4.2; extra == 'loaders'
|
|
53
|
+
Requires-Dist: docling==2.62.0; extra == 'loaders'
|
|
54
|
+
Requires-Dist: odfpy==1.4.1; extra == 'loaders'
|
|
55
|
+
Requires-Dist: pymupdf4llm>=0.0.17; extra == 'loaders'
|
|
56
|
+
Requires-Dist: pymupdf>=1.24.0; extra == 'loaders'
|
|
57
|
+
Requires-Dist: python-docx>=1.2.0; extra == 'loaders'
|
|
58
|
+
Requires-Dist: python-magic>=0.4.27; extra == 'loaders'
|
|
59
|
+
Provides-Extra: local-embeddings
|
|
60
|
+
Requires-Dist: sentence-transformers>=5.1.0; extra == 'local-embeddings'
|
|
61
|
+
Requires-Dist: torch>=2.0.0; extra == 'local-embeddings'
|
|
62
|
+
Provides-Extra: openai
|
|
63
|
+
Requires-Dist: openai>=1.0.0; extra == 'openai'
|
|
64
|
+
Provides-Extra: postgres
|
|
65
|
+
Requires-Dist: django>=4.0.0; extra == 'postgres'
|
|
66
|
+
Requires-Dist: pgvector; extra == 'postgres'
|
|
67
|
+
Requires-Dist: psycopg2-binary>=2.9.9; extra == 'postgres'
|
|
68
|
+
Description-Content-Type: text/markdown
|
|
69
|
+
|
|
70
|
+
# Rakam System Vectorstore
|
|
71
|
+
|
|
72
|
+
The vectorstore package of Rakam Systems providing vector database solutions and document processing capabilities.
|
|
73
|
+
|
|
74
|
+
## Overview
|
|
75
|
+
|
|
76
|
+
`rakam-systems-vectorstore` provides comprehensive vector storage, embedding models, and document loading capabilities. This package depends on `rakam-systems-core`.
|
|
77
|
+
|
|
78
|
+
## Features
|
|
79
|
+
|
|
80
|
+
- **Configuration-First Design**: Change your entire vector store setup via YAML - no code changes
|
|
81
|
+
- **Multiple Backends**: PostgreSQL with pgvector and FAISS in-memory storage
|
|
82
|
+
- **Flexible Embeddings**: Support for SentenceTransformers, OpenAI, and Cohere
|
|
83
|
+
- **Document Loaders**: PDF, DOCX, HTML, Markdown, CSV, and more
|
|
84
|
+
- **Search Capabilities**: Vector search, keyword search (BM25), and hybrid search
|
|
85
|
+
- **Chunking**: Intelligent text chunking with context preservation
|
|
86
|
+
- **Configuration**: Comprehensive YAML/JSON configuration support
|
|
87
|
+
|
|
88
|
+
### 🎯 Configuration Convenience
|
|
89
|
+
|
|
90
|
+
The vectorstore package's configurable design allows you to:
|
|
91
|
+
|
|
92
|
+
- **Switch embedding models** without code changes (local ↔ OpenAI ↔ Cohere)
|
|
93
|
+
- **Change search algorithms** instantly (BM25 ↔ ts_rank ↔ hybrid)
|
|
94
|
+
- **Adjust search parameters** (similarity metrics, top-k, hybrid weights)
|
|
95
|
+
- **Toggle features** (hybrid search, caching, reranking)
|
|
96
|
+
- **Tune performance** (batch sizes, chunk sizes, connection pools)
|
|
97
|
+
- **Swap backends** (FAISS ↔ PostgreSQL) by updating config
|
|
98
|
+
|
|
99
|
+
**Example**: Test different embedding models to find the best accuracy/cost balance - just update your YAML config file, no code changes needed!
|
|
100
|
+
|
|
101
|
+
## Installation
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
# Requires core package
|
|
105
|
+
pip install -e ./rakam-systems-core
|
|
106
|
+
|
|
107
|
+
# Install vectorstore package
|
|
108
|
+
pip install -e ./rakam-systems-vectorstore
|
|
109
|
+
|
|
110
|
+
# With specific backends
|
|
111
|
+
pip install -e "./rakam-systems-vectorstore[postgres]"
|
|
112
|
+
pip install -e "./rakam-systems-vectorstore[faiss]"
|
|
113
|
+
pip install -e "./rakam-systems-vectorstore[all]"
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## Quick Start
|
|
117
|
+
|
|
118
|
+
### FAISS Vector Store (In-Memory)
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
from rakam_systems_vectorstore.components.vectorstore.faiss_vector_store import FaissStore
|
|
122
|
+
from rakam_systems_vectorstore.core import Node, NodeMetadata
|
|
123
|
+
|
|
124
|
+
# Create store
|
|
125
|
+
store = FaissStore(
|
|
126
|
+
name="my_store",
|
|
127
|
+
base_index_path="./indexes",
|
|
128
|
+
embedding_model="Snowflake/snowflake-arctic-embed-m",
|
|
129
|
+
initialising=True
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
# Create nodes
|
|
133
|
+
nodes = [
|
|
134
|
+
Node(
|
|
135
|
+
content="Python is great for AI",
|
|
136
|
+
metadata=NodeMetadata(source_file_uuid="doc1", position=0)
|
|
137
|
+
)
|
|
138
|
+
]
|
|
139
|
+
|
|
140
|
+
# Add and search
|
|
141
|
+
store.create_collection_from_nodes("my_collection", nodes)
|
|
142
|
+
results, _ = store.search("my_collection", "AI programming", number=5)
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### PostgreSQL Vector Store
|
|
146
|
+
|
|
147
|
+
```python
|
|
148
|
+
import os
|
|
149
|
+
import django
|
|
150
|
+
from django.conf import settings
|
|
151
|
+
|
|
152
|
+
# Configure Django (required)
|
|
153
|
+
if not settings.configured:
|
|
154
|
+
settings.configure(
|
|
155
|
+
INSTALLED_APPS=[
|
|
156
|
+
'django.contrib.contenttypes',
|
|
157
|
+
'rakam_systems_vectorstore.components.vectorstore',
|
|
158
|
+
],
|
|
159
|
+
DATABASES={
|
|
160
|
+
'default': {
|
|
161
|
+
'ENGINE': 'django.db.backends.postgresql',
|
|
162
|
+
'NAME': os.getenv('POSTGRES_DB', 'vectorstore_db'),
|
|
163
|
+
'USER': os.getenv('POSTGRES_USER', 'postgres'),
|
|
164
|
+
'PASSWORD': os.getenv('POSTGRES_PASSWORD', 'postgres'),
|
|
165
|
+
'HOST': os.getenv('POSTGRES_HOST', 'localhost'),
|
|
166
|
+
'PORT': os.getenv('POSTGRES_PORT', '5432'),
|
|
167
|
+
}
|
|
168
|
+
},
|
|
169
|
+
DEFAULT_AUTO_FIELD='django.db.models.BigAutoField',
|
|
170
|
+
)
|
|
171
|
+
django.setup()
|
|
172
|
+
|
|
173
|
+
from rakam_systems_vectorstore import ConfigurablePgVectorStore, VectorStoreConfig
|
|
174
|
+
|
|
175
|
+
# Create configuration
|
|
176
|
+
config = VectorStoreConfig(
|
|
177
|
+
embedding={
|
|
178
|
+
"model_type": "sentence_transformer",
|
|
179
|
+
"model_name": "Snowflake/snowflake-arctic-embed-m"
|
|
180
|
+
},
|
|
181
|
+
search={
|
|
182
|
+
"similarity_metric": "cosine",
|
|
183
|
+
"enable_hybrid_search": True
|
|
184
|
+
}
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
# Create and use store
|
|
188
|
+
store = ConfigurablePgVectorStore(config=config)
|
|
189
|
+
store.setup()
|
|
190
|
+
store.add_nodes(nodes)
|
|
191
|
+
results = store.search("What is AI?", top_k=5)
|
|
192
|
+
store.shutdown()
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
## Core Components
|
|
196
|
+
|
|
197
|
+
### Vector Stores
|
|
198
|
+
|
|
199
|
+
- **ConfigurablePgVectorStore**: PostgreSQL with pgvector, supports hybrid search and keyword search
|
|
200
|
+
- **FaissStore**: In-memory FAISS-based vector search
|
|
201
|
+
|
|
202
|
+
### Embeddings
|
|
203
|
+
|
|
204
|
+
- **ConfigurableEmbeddings**: Supports multiple backends
|
|
205
|
+
- SentenceTransformers (local)
|
|
206
|
+
- OpenAI embeddings
|
|
207
|
+
- Cohere embeddings
|
|
208
|
+
|
|
209
|
+
### Document Loaders
|
|
210
|
+
|
|
211
|
+
- **AdaptiveLoader**: Automatically detects and loads various file types
|
|
212
|
+
- **PdfLoader**: Advanced PDF processing with Docling
|
|
213
|
+
- **PdfLoaderLight**: Lightweight PDF to markdown conversion
|
|
214
|
+
- **DocLoader**: Microsoft Word documents
|
|
215
|
+
- **OdtLoader**: OpenDocument Text files
|
|
216
|
+
- **MdLoader**: Markdown files
|
|
217
|
+
- **HtmlLoader**: HTML files
|
|
218
|
+
- **EmlLoader**: Email files
|
|
219
|
+
- **TabularLoader**: CSV, Excel files
|
|
220
|
+
- **CodeLoader**: Source code files
|
|
221
|
+
|
|
222
|
+
### Chunking
|
|
223
|
+
|
|
224
|
+
- **TextChunker**: Sentence-based chunking with Chonkie
|
|
225
|
+
- **AdvancedChunker**: Context-aware chunking with heading preservation
|
|
226
|
+
|
|
227
|
+
## Package Structure
|
|
228
|
+
|
|
229
|
+
```
|
|
230
|
+
rakam-systems-vectorstore/
|
|
231
|
+
├── src/rakam_systems_vectorstore/
|
|
232
|
+
│ ├── core.py # Node, VSFile, NodeMetadata
|
|
233
|
+
│ ├── config.py # VectorStoreConfig
|
|
234
|
+
│ ├── components/
|
|
235
|
+
│ │ ├── vectorstore/ # Store implementations
|
|
236
|
+
│ │ │ ├── configurable_pg_vectorstore.py
|
|
237
|
+
│ │ │ └── faiss_vector_store.py
|
|
238
|
+
│ │ ├── embedding_model/ # Embedding models
|
|
239
|
+
│ │ │ └── configurable_embeddings.py
|
|
240
|
+
│ │ ├── loader/ # Document loaders
|
|
241
|
+
│ │ │ ├── adaptive_loader.py
|
|
242
|
+
│ │ │ ├── pdf_loader.py
|
|
243
|
+
│ │ │ ├── pdf_loader_light.py
|
|
244
|
+
│ │ │ └── ... (other loaders)
|
|
245
|
+
│ │ └── chunker/ # Text chunkers
|
|
246
|
+
│ │ ├── text_chunker.py
|
|
247
|
+
│ │ └── advanced_chunker.py
|
|
248
|
+
│ ├── docs/ # Package documentation
|
|
249
|
+
│ └── server/ # MCP server
|
|
250
|
+
└── pyproject.toml
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
## Search Capabilities
|
|
254
|
+
|
|
255
|
+
### Vector Search
|
|
256
|
+
|
|
257
|
+
Semantic similarity search using embeddings:
|
|
258
|
+
|
|
259
|
+
```python
|
|
260
|
+
results = store.search("machine learning algorithms", top_k=10)
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
### Keyword Search (BM25)
|
|
264
|
+
|
|
265
|
+
Full-text search with BM25 ranking:
|
|
266
|
+
|
|
267
|
+
```python
|
|
268
|
+
results = store.keyword_search(
|
|
269
|
+
query="machine learning",
|
|
270
|
+
top_k=10,
|
|
271
|
+
ranking_algorithm="bm25"
|
|
272
|
+
)
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
### Hybrid Search
|
|
276
|
+
|
|
277
|
+
Combines vector and keyword search:
|
|
278
|
+
|
|
279
|
+
```python
|
|
280
|
+
results = store.hybrid_search(
|
|
281
|
+
query="neural networks",
|
|
282
|
+
top_k=10,
|
|
283
|
+
alpha=0.7 # 70% vector, 30% keyword
|
|
284
|
+
)
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
## Configuration
|
|
288
|
+
|
|
289
|
+
### From YAML
|
|
290
|
+
|
|
291
|
+
```yaml
|
|
292
|
+
# vectorstore_config.yaml
|
|
293
|
+
name: my_vectorstore
|
|
294
|
+
|
|
295
|
+
embedding:
|
|
296
|
+
model_type: sentence_transformer
|
|
297
|
+
model_name: Snowflake/snowflake-arctic-embed-m
|
|
298
|
+
batch_size: 128
|
|
299
|
+
normalize: true
|
|
300
|
+
|
|
301
|
+
database:
|
|
302
|
+
host: localhost
|
|
303
|
+
port: 5432
|
|
304
|
+
database: vectorstore_db
|
|
305
|
+
user: postgres
|
|
306
|
+
password: postgres
|
|
307
|
+
|
|
308
|
+
search:
|
|
309
|
+
similarity_metric: cosine
|
|
310
|
+
default_top_k: 5
|
|
311
|
+
enable_hybrid_search: true
|
|
312
|
+
hybrid_alpha: 0.7
|
|
313
|
+
|
|
314
|
+
index:
|
|
315
|
+
chunk_size: 512
|
|
316
|
+
chunk_overlap: 50
|
|
317
|
+
```
|
|
318
|
+
|
|
319
|
+
```python
|
|
320
|
+
config = VectorStoreConfig.from_yaml("vectorstore_config.yaml")
|
|
321
|
+
store = ConfigurablePgVectorStore(config=config)
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
## Documentation
|
|
325
|
+
|
|
326
|
+
Detailed documentation is available in the `src/rakam_systems_vectorstore/docs/` directory:
|
|
327
|
+
|
|
328
|
+
- [Installation Guide](src/rakam_systems_vectorstore/docs/INSTALLATION.md)
|
|
329
|
+
- [Quick Install](src/rakam_systems_vectorstore/docs/QUICK_INSTALL.md)
|
|
330
|
+
- [Architecture](src/rakam_systems_vectorstore/docs/ARCHITECTURE.md)
|
|
331
|
+
- [Package Structure](src/rakam_systems_vectorstore/docs/PACKAGE_STRUCTURE.md)
|
|
332
|
+
|
|
333
|
+
Loader-specific documentation:
|
|
334
|
+
|
|
335
|
+
- [PDF Loader](src/rakam_systems_vectorstore/components/loader/docs/PDF_LOADER_ARCHITECTURE.md)
|
|
336
|
+
- [DOC Loader](src/rakam_systems_vectorstore/components/loader/docs/DOC_LOADER_README.md)
|
|
337
|
+
- [Tabular Loader](src/rakam_systems_vectorstore/components/loader/docs/TABULAR_LOADER_README.md)
|
|
338
|
+
- [EML Loader](src/rakam_systems_vectorstore/components/loader/docs/EML_LOADER_README.md)
|
|
339
|
+
|
|
340
|
+
## Examples
|
|
341
|
+
|
|
342
|
+
See the `examples/ai_vectorstore_examples/` directory in the main repository for complete examples:
|
|
343
|
+
|
|
344
|
+
- Basic FAISS example
|
|
345
|
+
- PostgreSQL example
|
|
346
|
+
- Configurable vectorstore examples
|
|
347
|
+
- PDF loader examples
|
|
348
|
+
- Keyword search examples
|
|
349
|
+
|
|
350
|
+
## Environment Variables
|
|
351
|
+
|
|
352
|
+
- `POSTGRES_HOST`: PostgreSQL host (default: localhost)
|
|
353
|
+
- `POSTGRES_PORT`: PostgreSQL port (default: 5432)
|
|
354
|
+
- `POSTGRES_DB`: Database name (default: vectorstore_db)
|
|
355
|
+
- `POSTGRES_USER`: Database user (default: postgres)
|
|
356
|
+
- `POSTGRES_PASSWORD`: Database password
|
|
357
|
+
- `OPENAI_API_KEY`: For OpenAI embeddings
|
|
358
|
+
- `COHERE_API_KEY`: For Cohere embeddings
|
|
359
|
+
- `HUGGINGFACE_TOKEN`: For private HuggingFace models
|
|
360
|
+
|
|
361
|
+
## License
|
|
362
|
+
|
|
363
|
+
Apache 2.0
|
|
364
|
+
|
|
365
|
+
## Links
|
|
366
|
+
|
|
367
|
+
- [Main Repository](https://github.com/Rakam-AI/rakam-systems)
|
|
368
|
+
- [Documentation](../docs/)
|
|
369
|
+
- [Core Package](../rakam-systems-core/)
|
|
370
|
+
- [Agent Package](../rakam-system-agent/)
|