langchain-hyperspace 3.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain_hyperspace-3.0.1/PKG-INFO +322 -0
- langchain_hyperspace-3.0.1/README.md +283 -0
- langchain_hyperspace-3.0.1/pyproject.toml +79 -0
- langchain_hyperspace-3.0.1/setup.cfg +4 -0
- langchain_hyperspace-3.0.1/src/langchain_hyperspace/__init__.py +7 -0
- langchain_hyperspace-3.0.1/src/langchain_hyperspace/client.py +91 -0
- langchain_hyperspace-3.0.1/src/langchain_hyperspace/embeddings.py +77 -0
- langchain_hyperspace-3.0.1/src/langchain_hyperspace/generated/__init__.py +5 -0
- langchain_hyperspace-3.0.1/src/langchain_hyperspace/generated/hyperspace_pb2.py +243 -0
- langchain_hyperspace-3.0.1/src/langchain_hyperspace/generated/hyperspace_pb2.pyi +785 -0
- langchain_hyperspace-3.0.1/src/langchain_hyperspace/generated/hyperspace_pb2_grpc.py +1360 -0
- langchain_hyperspace-3.0.1/src/langchain_hyperspace/vectorstores.py +212 -0
- langchain_hyperspace-3.0.1/src/langchain_hyperspace.egg-info/PKG-INFO +322 -0
- langchain_hyperspace-3.0.1/src/langchain_hyperspace.egg-info/SOURCES.txt +16 -0
- langchain_hyperspace-3.0.1/src/langchain_hyperspace.egg-info/dependency_links.txt +1 -0
- langchain_hyperspace-3.0.1/src/langchain_hyperspace.egg-info/requires.txt +16 -0
- langchain_hyperspace-3.0.1/src/langchain_hyperspace.egg-info/top_level.txt +1 -0
- langchain_hyperspace-3.0.1/tests/test_vectorstore.py +232 -0
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: langchain-hyperspace
|
|
3
|
+
Version: 3.0.1
|
|
4
|
+
Summary: LangChain integration for HyperspaceDB - Hyperbolic Vector Database with Edge-Cloud Federation
|
|
5
|
+
Author-email: YARlabs <hi@yar.ink>
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/yourusername/hyperspace-db
|
|
8
|
+
Project-URL: Documentation, https://hyperspacedb.io/docs
|
|
9
|
+
Project-URL: Repository, https://github.com/yourusername/hyperspace-db
|
|
10
|
+
Project-URL: Bug Tracker, https://github.com/yourusername/hyperspace-db/issues
|
|
11
|
+
Keywords: langchain,vector database,embeddings,AI,RAG,hyperbolic,edge computing
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
|
+
Requires-Python: >=3.8
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
Requires-Dist: langchain-core>=0.1.0
|
|
26
|
+
Requires-Dist: grpcio>=1.60.0
|
|
27
|
+
Requires-Dist: protobuf>=4.25.0
|
|
28
|
+
Requires-Dist: numpy>=1.24.0
|
|
29
|
+
Provides-Extra: yarlabs
|
|
30
|
+
Requires-Dist: torch>=2.0.0; extra == "yarlabs"
|
|
31
|
+
Requires-Dist: transformers>=4.30.0; extra == "yarlabs"
|
|
32
|
+
Provides-Extra: dev
|
|
33
|
+
Requires-Dist: pytest>=7.4.0; extra == "dev"
|
|
34
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
35
|
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
|
36
|
+
Requires-Dist: mypy>=1.6.0; extra == "dev"
|
|
37
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
38
|
+
Requires-Dist: grpcio-tools>=1.60.0; extra == "dev"
|
|
39
|
+
|
|
40
|
+
# LangChain HyperspaceDB Integration
|
|
41
|
+
|
|
42
|
+
[](https://badge.fury.io/py/langchain-hyperspace)
|
|
43
|
+
[](https://opensource.org/licenses/Apache-2.0)
|
|
44
|
+
|
|
45
|
+
Official LangChain integration for [HyperspaceDB](https://github.com/yourusername/hyperspace-db) - a hyperbolic vector database with Edge-Cloud Federation.
|
|
46
|
+
|
|
47
|
+
## Features
|
|
48
|
+
|
|
49
|
+
- 🌐 **Hyperbolic Geometry**: Poincaré ball model for hierarchical embeddings
|
|
50
|
+
- 🔄 **Edge-Cloud Federation**: Offline-first with automatic sync
|
|
51
|
+
- 🌳 **Merkle Tree Sync**: Efficient data replication and verification
|
|
52
|
+
- 🗜️ **1-bit Quantization**: 64x memory reduction with minimal accuracy loss
|
|
53
|
+
- 🔍 **Built-in Deduplication**: Content-based hashing prevents duplicates
|
|
54
|
+
- ⚡ **High Performance**: Written in Rust for maximum speed
|
|
55
|
+
|
|
56
|
+
## Installation
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
pip install langchain-hyperspace
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Quick Start
|
|
63
|
+
|
|
64
|
+
### Basic Usage
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from langchain_hyperspace import HyperspaceVectorStore
|
|
68
|
+
from langchain_openai import OpenAIEmbeddings
|
|
69
|
+
from langchain.text_splitter import CharacterTextSplitter
|
|
70
|
+
from langchain.document_loaders import TextLoader
|
|
71
|
+
|
|
72
|
+
# Initialize embeddings
|
|
73
|
+
embeddings = OpenAIEmbeddings()
|
|
74
|
+
|
|
75
|
+
# Create vector store
|
|
76
|
+
vectorstore = HyperspaceVectorStore(
|
|
77
|
+
host="localhost",
|
|
78
|
+
port=50051,
|
|
79
|
+
collection_name="my_documents",
|
|
80
|
+
embedding_function=embeddings,
|
|
81
|
+
api_key="your_api_key" # Optional
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# Load and split documents
|
|
85
|
+
loader = TextLoader("path/to/document.txt")
|
|
86
|
+
documents = loader.load()
|
|
87
|
+
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
|
88
|
+
docs = text_splitter.split_documents(documents)
|
|
89
|
+
|
|
90
|
+
# Add documents to vector store
|
|
91
|
+
vectorstore.add_documents(docs)
|
|
92
|
+
|
|
93
|
+
# Search for similar documents
|
|
94
|
+
query = "What is the main topic?"
|
|
95
|
+
results = vectorstore.similarity_search(query, k=4)
|
|
96
|
+
|
|
97
|
+
for doc in results:
|
|
98
|
+
print(doc.page_content)
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### RAG (Retrieval-Augmented Generation) Example
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
from langchain_hyperspace import HyperspaceVectorStore
|
|
105
|
+
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
|
|
106
|
+
from langchain.chains import RetrievalQA
|
|
107
|
+
|
|
108
|
+
# Setup
|
|
109
|
+
embeddings = OpenAIEmbeddings()
|
|
110
|
+
vectorstore = HyperspaceVectorStore(
|
|
111
|
+
host="localhost",
|
|
112
|
+
port=50051,
|
|
113
|
+
collection_name="knowledge_base",
|
|
114
|
+
embedding_function=embeddings
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# Create RAG chain
|
|
118
|
+
llm = ChatOpenAI(model_name="gpt-4")
|
|
119
|
+
qa_chain = RetrievalQA.from_chain_type(
|
|
120
|
+
llm=llm,
|
|
121
|
+
chain_type="stuff",
|
|
122
|
+
retriever=vectorstore.as_retriever(search_kwargs={"k": 3})
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
# Ask questions
|
|
126
|
+
response = qa_chain.run("What are the key features of HyperspaceDB?")
|
|
127
|
+
print(response)
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### Content Deduplication
|
|
131
|
+
|
|
132
|
+
HyperspaceDB automatically deduplicates content using SHA-256 hashing:
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
vectorstore = HyperspaceVectorStore(
|
|
136
|
+
host="localhost",
|
|
137
|
+
port=50051,
|
|
138
|
+
collection_name="deduplicated_docs",
|
|
139
|
+
embedding_function=embeddings,
|
|
140
|
+
enable_deduplication=True # Default
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# Adding the same text twice will only store it once
|
|
144
|
+
vectorstore.add_texts([
|
|
145
|
+
"This is a unique document",
|
|
146
|
+
"This is a unique document", # Duplicate - will be skipped
|
|
147
|
+
"This is another document"
|
|
148
|
+
])
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
### Sync Verification (Edge-Cloud Federation)
|
|
152
|
+
|
|
153
|
+
Check synchronization status using Merkle Tree digest:
|
|
154
|
+
|
|
155
|
+
```python
|
|
156
|
+
# Get collection digest
|
|
157
|
+
digest = vectorstore.get_digest()
|
|
158
|
+
|
|
159
|
+
print(f"Logical Clock: {digest['logical_clock']}")
|
|
160
|
+
print(f"State Hash: {digest['state_hash']}")
|
|
161
|
+
print(f"Vector Count: {digest['count']}")
|
|
162
|
+
print(f"Bucket Hashes: {len(digest['buckets'])} buckets")
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
## Configuration
|
|
166
|
+
|
|
167
|
+
### Connection Options
|
|
168
|
+
|
|
169
|
+
```python
|
|
170
|
+
vectorstore = HyperspaceVectorStore(
|
|
171
|
+
host="localhost", # Server host
|
|
172
|
+
port=50051, # gRPC port
|
|
173
|
+
collection_name="default", # Collection name
|
|
174
|
+
embedding_function=embeddings,
|
|
175
|
+
api_key=None, # Optional API key
|
|
176
|
+
dimension=1536, # Vector dimension (must match embeddings)
|
|
177
|
+
metric="l2", # Distance metric: 'l2', 'cosine', 'dot'
|
|
178
|
+
enable_deduplication=True # Enable content-based deduplication
|
|
179
|
+
)
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
### Distance Metrics
|
|
183
|
+
|
|
184
|
+
- `l2`: Euclidean distance (default)
|
|
185
|
+
- `cosine`: Cosine similarity
|
|
186
|
+
- `dot`: Dot product
|
|
187
|
+
|
|
188
|
+
## Advanced Usage
|
|
189
|
+
|
|
190
|
+
### Metadata Filtering
|
|
191
|
+
|
|
192
|
+
```python
|
|
193
|
+
# Add documents with metadata
|
|
194
|
+
vectorstore.add_texts(
|
|
195
|
+
texts=["Document 1", "Document 2"],
|
|
196
|
+
metadatas=[
|
|
197
|
+
{"source": "web", "category": "tech"},
|
|
198
|
+
{"source": "pdf", "category": "science"}
|
|
199
|
+
]
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# Search with metadata filter (coming soon)
|
|
203
|
+
results = vectorstore.similarity_search(
|
|
204
|
+
"technology trends",
|
|
205
|
+
k=5,
|
|
206
|
+
filter={"category": "tech"}
|
|
207
|
+
)
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
### Batch Operations
|
|
211
|
+
|
|
212
|
+
```python
|
|
213
|
+
# Add large batches efficiently
|
|
214
|
+
texts = [f"Document {i}" for i in range(10000)]
|
|
215
|
+
metadatas = [{"index": i} for i in range(10000)]
|
|
216
|
+
|
|
217
|
+
vectorstore.add_texts(texts, metadatas=metadatas)
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
## Running HyperspaceDB Server
|
|
221
|
+
|
|
222
|
+
### Using Docker
|
|
223
|
+
|
|
224
|
+
```bash
|
|
225
|
+
docker run -p 50051:50051 -p 50050:50050 \
|
|
226
|
+
-e HYPERSPACE_API_KEY=your_secret_key \
|
|
227
|
+
hyperspacedb/hyperspace-server:latest
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
### From Source
|
|
231
|
+
|
|
232
|
+
```bash
|
|
233
|
+
git clone https://github.com/yourusername/hyperspace-db
|
|
234
|
+
cd hyperspace-db
|
|
235
|
+
cargo build --release
|
|
236
|
+
HYPERSPACE_API_KEY=your_secret_key ./target/release/hyperspace-server
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
## Development
|
|
240
|
+
|
|
241
|
+
### Setup
|
|
242
|
+
|
|
243
|
+
```bash
|
|
244
|
+
git clone https://github.com/yourusername/hyperspace-db
|
|
245
|
+
cd hyperspace-db/integrations/langchain-python
|
|
246
|
+
|
|
247
|
+
# Install in development mode
|
|
248
|
+
pip install -e ".[dev]"
|
|
249
|
+
|
|
250
|
+
# Generate protobuf files
|
|
251
|
+
./generate_proto.sh
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
### Running Tests
|
|
255
|
+
|
|
256
|
+
```bash
|
|
257
|
+
pytest tests/
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
### Code Quality
|
|
261
|
+
|
|
262
|
+
```bash
|
|
263
|
+
# Format code
|
|
264
|
+
black src/ tests/
|
|
265
|
+
|
|
266
|
+
# Lint
|
|
267
|
+
ruff check src/ tests/
|
|
268
|
+
|
|
269
|
+
# Type check
|
|
270
|
+
mypy src/
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
## Examples
|
|
274
|
+
|
|
275
|
+
See the [examples/](examples/) directory for complete examples:
|
|
276
|
+
|
|
277
|
+
- `rag_chatbot.py`: RAG chatbot with memory
|
|
278
|
+
- `document_qa.py`: Document Q&A system
|
|
279
|
+
- `semantic_search.py`: Semantic search engine
|
|
280
|
+
- `edge_sync.py`: Edge-Cloud synchronization demo
|
|
281
|
+
|
|
282
|
+
## Documentation
|
|
283
|
+
|
|
284
|
+
- [HyperspaceDB Documentation](https://hyperspacedb.io/docs)
|
|
285
|
+
- [LangChain Documentation](https://python.langchain.com/docs)
|
|
286
|
+
- [API Reference](https://hyperspacedb.io/docs/api)
|
|
287
|
+
|
|
288
|
+
## Performance
|
|
289
|
+
|
|
290
|
+
HyperspaceDB is optimized for:
|
|
291
|
+
|
|
292
|
+
- **Insert**: 10K+ vectors/second
|
|
293
|
+
- **Search**: <10ms p99 latency
|
|
294
|
+
- **Memory**: 64x reduction with 1-bit quantization
|
|
295
|
+
- **Sync**: Merkle Tree-based differential sync
|
|
296
|
+
|
|
297
|
+
## Contributing
|
|
298
|
+
|
|
299
|
+
Contributions are welcome! Please see [CONTRIBUTING.md](../../CONTRIBUTING.md) for guidelines.
|
|
300
|
+
|
|
301
|
+
## License
|
|
302
|
+
|
|
303
|
+
Apache License 2.0 - see [LICENSE](../../LICENSE) for details.
|
|
304
|
+
|
|
305
|
+
## Support
|
|
306
|
+
|
|
307
|
+
- GitHub Issues: [Report bugs](https://github.com/yourusername/hyperspace-db/issues)
|
|
308
|
+
- Discord: [Join community](https://discord.gg/hyperspacedb)
|
|
309
|
+
- Email: support@hyperspacedb.io
|
|
310
|
+
|
|
311
|
+
## Citation
|
|
312
|
+
|
|
313
|
+
If you use HyperspaceDB in your research, please cite:
|
|
314
|
+
|
|
315
|
+
```bibtex
|
|
316
|
+
@software{hyperspacedb2024,
|
|
317
|
+
title = {HyperspaceDB: Hyperbolic Vector Database with Edge-Cloud Federation},
|
|
318
|
+
author = {HyperspaceDB Team},
|
|
319
|
+
year = {2024},
|
|
320
|
+
url = {https://github.com/yourusername/hyperspace-db}
|
|
321
|
+
}
|
|
322
|
+
```
|
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
# LangChain HyperspaceDB Integration
|
|
2
|
+
|
|
3
|
+
[](https://badge.fury.io/py/langchain-hyperspace)
|
|
4
|
+
[](https://opensource.org/licenses/Apache-2.0)
|
|
5
|
+
|
|
6
|
+
Official LangChain integration for [HyperspaceDB](https://github.com/yourusername/hyperspace-db) - a hyperbolic vector database with Edge-Cloud Federation.
|
|
7
|
+
|
|
8
|
+
## Features
|
|
9
|
+
|
|
10
|
+
- 🌐 **Hyperbolic Geometry**: Poincaré ball model for hierarchical embeddings
|
|
11
|
+
- 🔄 **Edge-Cloud Federation**: Offline-first with automatic sync
|
|
12
|
+
- 🌳 **Merkle Tree Sync**: Efficient data replication and verification
|
|
13
|
+
- 🗜️ **1-bit Quantization**: 64x memory reduction with minimal accuracy loss
|
|
14
|
+
- 🔍 **Built-in Deduplication**: Content-based hashing prevents duplicates
|
|
15
|
+
- ⚡ **High Performance**: Written in Rust for maximum speed
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install langchain-hyperspace
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Quick Start
|
|
24
|
+
|
|
25
|
+
### Basic Usage
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
from langchain_hyperspace import HyperspaceVectorStore
|
|
29
|
+
from langchain_openai import OpenAIEmbeddings
|
|
30
|
+
from langchain.text_splitter import CharacterTextSplitter
|
|
31
|
+
from langchain.document_loaders import TextLoader
|
|
32
|
+
|
|
33
|
+
# Initialize embeddings
|
|
34
|
+
embeddings = OpenAIEmbeddings()
|
|
35
|
+
|
|
36
|
+
# Create vector store
|
|
37
|
+
vectorstore = HyperspaceVectorStore(
|
|
38
|
+
host="localhost",
|
|
39
|
+
port=50051,
|
|
40
|
+
collection_name="my_documents",
|
|
41
|
+
embedding_function=embeddings,
|
|
42
|
+
api_key="your_api_key" # Optional
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# Load and split documents
|
|
46
|
+
loader = TextLoader("path/to/document.txt")
|
|
47
|
+
documents = loader.load()
|
|
48
|
+
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
|
49
|
+
docs = text_splitter.split_documents(documents)
|
|
50
|
+
|
|
51
|
+
# Add documents to vector store
|
|
52
|
+
vectorstore.add_documents(docs)
|
|
53
|
+
|
|
54
|
+
# Search for similar documents
|
|
55
|
+
query = "What is the main topic?"
|
|
56
|
+
results = vectorstore.similarity_search(query, k=4)
|
|
57
|
+
|
|
58
|
+
for doc in results:
|
|
59
|
+
print(doc.page_content)
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### RAG (Retrieval-Augmented Generation) Example
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
from langchain_hyperspace import HyperspaceVectorStore
|
|
66
|
+
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
|
|
67
|
+
from langchain.chains import RetrievalQA
|
|
68
|
+
|
|
69
|
+
# Setup
|
|
70
|
+
embeddings = OpenAIEmbeddings()
|
|
71
|
+
vectorstore = HyperspaceVectorStore(
|
|
72
|
+
host="localhost",
|
|
73
|
+
port=50051,
|
|
74
|
+
collection_name="knowledge_base",
|
|
75
|
+
embedding_function=embeddings
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# Create RAG chain
|
|
79
|
+
llm = ChatOpenAI(model_name="gpt-4")
|
|
80
|
+
qa_chain = RetrievalQA.from_chain_type(
|
|
81
|
+
llm=llm,
|
|
82
|
+
chain_type="stuff",
|
|
83
|
+
retriever=vectorstore.as_retriever(search_kwargs={"k": 3})
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# Ask questions
|
|
87
|
+
response = qa_chain.run("What are the key features of HyperspaceDB?")
|
|
88
|
+
print(response)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Content Deduplication
|
|
92
|
+
|
|
93
|
+
HyperspaceDB automatically deduplicates content using SHA-256 hashing:
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
vectorstore = HyperspaceVectorStore(
|
|
97
|
+
host="localhost",
|
|
98
|
+
port=50051,
|
|
99
|
+
collection_name="deduplicated_docs",
|
|
100
|
+
embedding_function=embeddings,
|
|
101
|
+
enable_deduplication=True # Default
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# Adding the same text twice will only store it once
|
|
105
|
+
vectorstore.add_texts([
|
|
106
|
+
"This is a unique document",
|
|
107
|
+
"This is a unique document", # Duplicate - will be skipped
|
|
108
|
+
"This is another document"
|
|
109
|
+
])
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Sync Verification (Edge-Cloud Federation)
|
|
113
|
+
|
|
114
|
+
Check synchronization status using Merkle Tree digest:
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
# Get collection digest
|
|
118
|
+
digest = vectorstore.get_digest()
|
|
119
|
+
|
|
120
|
+
print(f"Logical Clock: {digest['logical_clock']}")
|
|
121
|
+
print(f"State Hash: {digest['state_hash']}")
|
|
122
|
+
print(f"Vector Count: {digest['count']}")
|
|
123
|
+
print(f"Bucket Hashes: {len(digest['buckets'])} buckets")
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## Configuration
|
|
127
|
+
|
|
128
|
+
### Connection Options
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
vectorstore = HyperspaceVectorStore(
|
|
132
|
+
host="localhost", # Server host
|
|
133
|
+
port=50051, # gRPC port
|
|
134
|
+
collection_name="default", # Collection name
|
|
135
|
+
embedding_function=embeddings,
|
|
136
|
+
api_key=None, # Optional API key
|
|
137
|
+
dimension=1536, # Vector dimension (must match embeddings)
|
|
138
|
+
metric="l2", # Distance metric: 'l2', 'cosine', 'dot'
|
|
139
|
+
enable_deduplication=True # Enable content-based deduplication
|
|
140
|
+
)
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
### Distance Metrics
|
|
144
|
+
|
|
145
|
+
- `l2`: Euclidean distance (default)
|
|
146
|
+
- `cosine`: Cosine similarity
|
|
147
|
+
- `dot`: Dot product
|
|
148
|
+
|
|
149
|
+
## Advanced Usage
|
|
150
|
+
|
|
151
|
+
### Metadata Filtering
|
|
152
|
+
|
|
153
|
+
```python
|
|
154
|
+
# Add documents with metadata
|
|
155
|
+
vectorstore.add_texts(
|
|
156
|
+
texts=["Document 1", "Document 2"],
|
|
157
|
+
metadatas=[
|
|
158
|
+
{"source": "web", "category": "tech"},
|
|
159
|
+
{"source": "pdf", "category": "science"}
|
|
160
|
+
]
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
# Search with metadata filter (coming soon)
|
|
164
|
+
results = vectorstore.similarity_search(
|
|
165
|
+
"technology trends",
|
|
166
|
+
k=5,
|
|
167
|
+
filter={"category": "tech"}
|
|
168
|
+
)
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
### Batch Operations
|
|
172
|
+
|
|
173
|
+
```python
|
|
174
|
+
# Add large batches efficiently
|
|
175
|
+
texts = [f"Document {i}" for i in range(10000)]
|
|
176
|
+
metadatas = [{"index": i} for i in range(10000)]
|
|
177
|
+
|
|
178
|
+
vectorstore.add_texts(texts, metadatas=metadatas)
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
## Running HyperspaceDB Server
|
|
182
|
+
|
|
183
|
+
### Using Docker
|
|
184
|
+
|
|
185
|
+
```bash
|
|
186
|
+
docker run -p 50051:50051 -p 50050:50050 \
|
|
187
|
+
-e HYPERSPACE_API_KEY=your_secret_key \
|
|
188
|
+
hyperspacedb/hyperspace-server:latest
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
### From Source
|
|
192
|
+
|
|
193
|
+
```bash
|
|
194
|
+
git clone https://github.com/yourusername/hyperspace-db
|
|
195
|
+
cd hyperspace-db
|
|
196
|
+
cargo build --release
|
|
197
|
+
HYPERSPACE_API_KEY=your_secret_key ./target/release/hyperspace-server
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
## Development
|
|
201
|
+
|
|
202
|
+
### Setup
|
|
203
|
+
|
|
204
|
+
```bash
|
|
205
|
+
git clone https://github.com/yourusername/hyperspace-db
|
|
206
|
+
cd hyperspace-db/integrations/langchain-python
|
|
207
|
+
|
|
208
|
+
# Install in development mode
|
|
209
|
+
pip install -e ".[dev]"
|
|
210
|
+
|
|
211
|
+
# Generate protobuf files
|
|
212
|
+
./generate_proto.sh
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
### Running Tests
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
pytest tests/
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
### Code Quality
|
|
222
|
+
|
|
223
|
+
```bash
|
|
224
|
+
# Format code
|
|
225
|
+
black src/ tests/
|
|
226
|
+
|
|
227
|
+
# Lint
|
|
228
|
+
ruff check src/ tests/
|
|
229
|
+
|
|
230
|
+
# Type check
|
|
231
|
+
mypy src/
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
## Examples
|
|
235
|
+
|
|
236
|
+
See the [examples/](examples/) directory for complete examples:
|
|
237
|
+
|
|
238
|
+
- `rag_chatbot.py`: RAG chatbot with memory
|
|
239
|
+
- `document_qa.py`: Document Q&A system
|
|
240
|
+
- `semantic_search.py`: Semantic search engine
|
|
241
|
+
- `edge_sync.py`: Edge-Cloud synchronization demo
|
|
242
|
+
|
|
243
|
+
## Documentation
|
|
244
|
+
|
|
245
|
+
- [HyperspaceDB Documentation](https://hyperspacedb.io/docs)
|
|
246
|
+
- [LangChain Documentation](https://python.langchain.com/docs)
|
|
247
|
+
- [API Reference](https://hyperspacedb.io/docs/api)
|
|
248
|
+
|
|
249
|
+
## Performance
|
|
250
|
+
|
|
251
|
+
HyperspaceDB is optimized for:
|
|
252
|
+
|
|
253
|
+
- **Insert**: 10K+ vectors/second
|
|
254
|
+
- **Search**: <10ms p99 latency
|
|
255
|
+
- **Memory**: 64x reduction with 1-bit quantization
|
|
256
|
+
- **Sync**: Merkle Tree-based differential sync
|
|
257
|
+
|
|
258
|
+
## Contributing
|
|
259
|
+
|
|
260
|
+
Contributions are welcome! Please see [CONTRIBUTING.md](../../CONTRIBUTING.md) for guidelines.
|
|
261
|
+
|
|
262
|
+
## License
|
|
263
|
+
|
|
264
|
+
Apache License 2.0 - see [LICENSE](../../LICENSE) for details.
|
|
265
|
+
|
|
266
|
+
## Support
|
|
267
|
+
|
|
268
|
+
- GitHub Issues: [Report bugs](https://github.com/yourusername/hyperspace-db/issues)
|
|
269
|
+
- Discord: [Join community](https://discord.gg/hyperspacedb)
|
|
270
|
+
- Email: support@hyperspacedb.io
|
|
271
|
+
|
|
272
|
+
## Citation
|
|
273
|
+
|
|
274
|
+
If you use HyperspaceDB in your research, please cite:
|
|
275
|
+
|
|
276
|
+
```bibtex
|
|
277
|
+
@software{hyperspacedb2024,
|
|
278
|
+
title = {HyperspaceDB: Hyperbolic Vector Database with Edge-Cloud Federation},
|
|
279
|
+
author = {HyperspaceDB Team},
|
|
280
|
+
year = {2024},
|
|
281
|
+
url = {https://github.com/yourusername/hyperspace-db}
|
|
282
|
+
}
|
|
283
|
+
```
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "langchain-hyperspace"
|
|
7
|
+
version = "3.0.1"
|
|
8
|
+
description = "LangChain integration for HyperspaceDB - Hyperbolic Vector Database with Edge-Cloud Federation"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
license = {text = "Apache-2.0"}
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "YARlabs", email = "hi@yar.ink"}
|
|
14
|
+
]
|
|
15
|
+
keywords = ["langchain", "vector database", "embeddings", "AI", "RAG", "hyperbolic", "edge computing"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 4 - Beta",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"License :: OSI Approved :: Apache Software License",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.8",
|
|
22
|
+
"Programming Language :: Python :: 3.9",
|
|
23
|
+
"Programming Language :: Python :: 3.10",
|
|
24
|
+
"Programming Language :: Python :: 3.11",
|
|
25
|
+
"Programming Language :: Python :: 3.12",
|
|
26
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
27
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
dependencies = [
|
|
31
|
+
"langchain-core>=0.1.0",
|
|
32
|
+
"grpcio>=1.60.0",
|
|
33
|
+
"protobuf>=4.25.0",
|
|
34
|
+
"numpy>=1.24.0",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
[project.optional-dependencies]
|
|
38
|
+
yarlabs = [
|
|
39
|
+
"torch>=2.0.0",
|
|
40
|
+
"transformers>=4.30.0",
|
|
41
|
+
]
|
|
42
|
+
dev = [
|
|
43
|
+
"pytest>=7.4.0",
|
|
44
|
+
"pytest-asyncio>=0.21.0",
|
|
45
|
+
"black>=23.0.0",
|
|
46
|
+
"mypy>=1.6.0",
|
|
47
|
+
"ruff>=0.1.0",
|
|
48
|
+
"grpcio-tools>=1.60.0",
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
[project.urls]
|
|
52
|
+
Homepage = "https://github.com/yourusername/hyperspace-db"
|
|
53
|
+
Documentation = "https://hyperspacedb.io/docs"
|
|
54
|
+
Repository = "https://github.com/yourusername/hyperspace-db"
|
|
55
|
+
"Bug Tracker" = "https://github.com/yourusername/hyperspace-db/issues"
|
|
56
|
+
|
|
57
|
+
[tool.setuptools.packages.find]
|
|
58
|
+
where = ["src"]
|
|
59
|
+
|
|
60
|
+
[tool.black]
|
|
61
|
+
line-length = 100
|
|
62
|
+
target-version = ['py38', 'py39', 'py310', 'py311']
|
|
63
|
+
|
|
64
|
+
[tool.ruff]
|
|
65
|
+
line-length = 100
|
|
66
|
+
target-version = "3.0.1"
|
|
67
|
+
|
|
68
|
+
[tool.mypy]
|
|
69
|
+
python_version = "3.0.1"
|
|
70
|
+
warn_return_any = true
|
|
71
|
+
warn_unused_configs = true
|
|
72
|
+
disallow_untyped_defs = true
|
|
73
|
+
|
|
74
|
+
[tool.pytest.ini_options]
|
|
75
|
+
testpaths = ["tests"]
|
|
76
|
+
python_files = "test_*.py"
|
|
77
|
+
python_classes = "Test*"
|
|
78
|
+
python_functions = "test_*"
|
|
79
|
+
asyncio_mode = "auto"
|