simplevecdb 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- simplevecdb/__init__.py +20 -0
- simplevecdb/config.py +105 -0
- simplevecdb/core.py +918 -0
- simplevecdb/embeddings/__init__.py +0 -0
- simplevecdb/embeddings/models.py +104 -0
- simplevecdb/embeddings/server.py +276 -0
- simplevecdb/integrations/__init__.py +9 -0
- simplevecdb/integrations/langchain.py +255 -0
- simplevecdb/integrations/llamaindex.py +220 -0
- simplevecdb/types.py +28 -0
- simplevecdb/utils.py +19 -0
- simplevecdb-1.0.0.dist-info/METADATA +453 -0
- simplevecdb-1.0.0.dist-info/RECORD +16 -0
- simplevecdb-1.0.0.dist-info/WHEEL +4 -0
- simplevecdb-1.0.0.dist-info/entry_points.txt +2 -0
- simplevecdb-1.0.0.dist-info/licenses/LICENSE +21 -0
simplevecdb/__init__.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from .types import Document, DistanceStrategy
|
|
4
|
+
from .core import VectorDB, VectorCollection, Quantization, get_optimal_batch_size
|
|
5
|
+
from .config import config
|
|
6
|
+
from .integrations.langchain import SimpleVecDBVectorStore
|
|
7
|
+
from .integrations.llamaindex import SimpleVecDBLlamaStore
|
|
8
|
+
|
|
9
|
+
__version__ = "1.0.0"
|
|
10
|
+
__all__ = [
|
|
11
|
+
"VectorDB",
|
|
12
|
+
"VectorCollection",
|
|
13
|
+
"Quantization",
|
|
14
|
+
"Document",
|
|
15
|
+
"DistanceStrategy",
|
|
16
|
+
"SimpleVecDBVectorStore",
|
|
17
|
+
"SimpleVecDBLlamaStore",
|
|
18
|
+
"config",
|
|
19
|
+
"get_optimal_batch_size",
|
|
20
|
+
]
|
simplevecdb/config.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""Environment configuration for SimpleVecDB."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from dotenv import load_dotenv
|
|
6
|
+
|
|
7
|
+
from .core import get_optimal_batch_size
|
|
8
|
+
|
|
9
|
+
# Load .env file from project root
|
|
10
|
+
env_path = Path(__file__).parent.parent.parent / ".env"
|
|
11
|
+
load_dotenv(dotenv_path=env_path)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _parse_registry(raw: str | None, default_model: str) -> dict[str, str]:
|
|
15
|
+
"""Convert comma-separated alias=repo entries into a registry dict."""
|
|
16
|
+
registry: dict[str, str] = {}
|
|
17
|
+
if raw:
|
|
18
|
+
for entry in raw.split(","):
|
|
19
|
+
entry = entry.strip()
|
|
20
|
+
if not entry:
|
|
21
|
+
continue
|
|
22
|
+
if "=" in entry:
|
|
23
|
+
alias, repo = entry.split("=", 1)
|
|
24
|
+
registry[alias.strip()] = repo.strip()
|
|
25
|
+
else:
|
|
26
|
+
registry[entry] = entry
|
|
27
|
+
registry.setdefault("default", default_model)
|
|
28
|
+
return registry
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _parse_api_keys(raw: str | None) -> set[str]:
|
|
32
|
+
"""Return a sanitized set of API keys from comma-separated env values."""
|
|
33
|
+
if not raw:
|
|
34
|
+
return set()
|
|
35
|
+
return {token.strip() for token in raw.split(",") if token.strip()}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _parse_bool_env(raw: str | None, default: bool) -> bool:
|
|
39
|
+
"""Handle common truthy/falsey env strings with a fallback default."""
|
|
40
|
+
if raw is None:
|
|
41
|
+
return default
|
|
42
|
+
return raw.strip().lower() not in {"0", "false", "no", "off"}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class Config:
|
|
46
|
+
"""
|
|
47
|
+
Configuration settings for SimpleVecDB, loaded from environment variables.
|
|
48
|
+
|
|
49
|
+
Attributes:
|
|
50
|
+
EMBEDDING_MODEL: The default embedding model repo id or alias.
|
|
51
|
+
EMBEDDING_CACHE_DIR: Directory path for caching embedding models.
|
|
52
|
+
EMBEDDING_MODEL_REGISTRY: Mapping of model aliases to repo ids.
|
|
53
|
+
EMBEDDING_MODEL_REGISTRY_LOCKED: If True, only allow listed models.
|
|
54
|
+
EMBEDDING_BATCH_SIZE: Optimal batch size for embedding requests.
|
|
55
|
+
EMBEDDING_SERVER_MAX_REQUEST_ITEMS: Max items per embedding request.
|
|
56
|
+
EMBEDDING_SERVER_API_KEYS: Set of valid API keys for the embedding server.
|
|
57
|
+
DATABASE_PATH: Path to the SimpleVecDB database file.
|
|
58
|
+
SERVER_HOST: Host address for the SimpleVecDB server.
|
|
59
|
+
SERVER_PORT: Port number for the SimpleVecDB server.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
# Embedding Model
|
|
63
|
+
EMBEDDING_MODEL: str = os.getenv("EMBEDDING_MODEL", "TaylorAI/bge-micro-v2")
|
|
64
|
+
EMBEDDING_CACHE_DIR: str = os.getenv(
|
|
65
|
+
"EMBEDDING_CACHE_DIR", str(Path.home() / ".cache" / "simplevecdb")
|
|
66
|
+
)
|
|
67
|
+
_registry_env = os.getenv("EMBEDDING_MODEL_REGISTRY")
|
|
68
|
+
EMBEDDING_MODEL_REGISTRY: dict[str, str] = _parse_registry(
|
|
69
|
+
_registry_env, EMBEDDING_MODEL
|
|
70
|
+
)
|
|
71
|
+
EMBEDDING_MODEL_REGISTRY_LOCKED: bool = _parse_bool_env(
|
|
72
|
+
os.getenv("EMBEDDING_MODEL_REGISTRY_LOCKED"), True
|
|
73
|
+
)
|
|
74
|
+
# Auto-detect optimal batch size if not explicitly set
|
|
75
|
+
_batch_size_env = os.getenv("EMBEDDING_BATCH_SIZE")
|
|
76
|
+
EMBEDDING_BATCH_SIZE: int = (
|
|
77
|
+
int(_batch_size_env)
|
|
78
|
+
if _batch_size_env is not None
|
|
79
|
+
else get_optimal_batch_size()
|
|
80
|
+
)
|
|
81
|
+
_request_limit_env = os.getenv("EMBEDDING_SERVER_MAX_REQUEST_ITEMS") or os.getenv(
|
|
82
|
+
"EMBEDDING_SERVER_MAX_BATCH"
|
|
83
|
+
)
|
|
84
|
+
EMBEDDING_SERVER_MAX_REQUEST_ITEMS: int = (
|
|
85
|
+
int(_request_limit_env) if _request_limit_env else max(32, EMBEDDING_BATCH_SIZE)
|
|
86
|
+
)
|
|
87
|
+
EMBEDDING_SERVER_API_KEYS: set[str] = _parse_api_keys(
|
|
88
|
+
os.getenv("EMBEDDING_SERVER_API_KEYS")
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Database
|
|
92
|
+
DATABASE_PATH: str = os.getenv("DATABASE_PATH", ":memory:")
|
|
93
|
+
|
|
94
|
+
# Server
|
|
95
|
+
SERVER_HOST: str = os.getenv("SERVER_HOST", "0.0.0.0")
|
|
96
|
+
SERVER_PORT: int = int(os.getenv("SERVER_PORT", "8000"))
|
|
97
|
+
|
|
98
|
+
@classmethod
|
|
99
|
+
def from_env(cls) -> "Config":
|
|
100
|
+
"""Load configuration from environment variables."""
|
|
101
|
+
return cls()
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
# Singleton instance
|
|
105
|
+
config = Config.from_env()
|