simplevecdb 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ from __future__ import annotations
2
+
3
+ from .types import Document, DistanceStrategy
4
+ from .core import VectorDB, VectorCollection, Quantization, get_optimal_batch_size
5
+ from .config import config
6
+ from .integrations.langchain import SimpleVecDBVectorStore
7
+ from .integrations.llamaindex import SimpleVecDBLlamaStore
8
+
9
+ __version__ = "1.0.0"
10
+ __all__ = [
11
+ "VectorDB",
12
+ "VectorCollection",
13
+ "Quantization",
14
+ "Document",
15
+ "DistanceStrategy",
16
+ "SimpleVecDBVectorStore",
17
+ "SimpleVecDBLlamaStore",
18
+ "config",
19
+ "get_optimal_batch_size",
20
+ ]
simplevecdb/config.py ADDED
@@ -0,0 +1,105 @@
1
+ """Environment configuration for SimpleVecDB."""
2
+
3
+ import os
4
+ from pathlib import Path
5
+ from dotenv import load_dotenv
6
+
7
+ from .core import get_optimal_batch_size
8
+
9
+ # Load .env file from project root
10
+ env_path = Path(__file__).parent.parent.parent / ".env"
11
+ load_dotenv(dotenv_path=env_path)
12
+
13
+
14
+ def _parse_registry(raw: str | None, default_model: str) -> dict[str, str]:
15
+ """Convert comma-separated alias=repo entries into a registry dict."""
16
+ registry: dict[str, str] = {}
17
+ if raw:
18
+ for entry in raw.split(","):
19
+ entry = entry.strip()
20
+ if not entry:
21
+ continue
22
+ if "=" in entry:
23
+ alias, repo = entry.split("=", 1)
24
+ registry[alias.strip()] = repo.strip()
25
+ else:
26
+ registry[entry] = entry
27
+ registry.setdefault("default", default_model)
28
+ return registry
29
+
30
+
31
+ def _parse_api_keys(raw: str | None) -> set[str]:
32
+ """Return a sanitized set of API keys from comma-separated env values."""
33
+ if not raw:
34
+ return set()
35
+ return {token.strip() for token in raw.split(",") if token.strip()}
36
+
37
+
38
+ def _parse_bool_env(raw: str | None, default: bool) -> bool:
39
+ """Handle common truthy/falsey env strings with a fallback default."""
40
+ if raw is None:
41
+ return default
42
+ return raw.strip().lower() not in {"0", "false", "no", "off"}
43
+
44
+
45
+ class Config:
46
+ """
47
+ Configuration settings for SimpleVecDB, loaded from environment variables.
48
+
49
+ Attributes:
50
+ EMBEDDING_MODEL: The default embedding model repo id or alias.
51
+ EMBEDDING_CACHE_DIR: Directory path for caching embedding models.
52
+ EMBEDDING_MODEL_REGISTRY: Mapping of model aliases to repo ids.
53
+ EMBEDDING_MODEL_REGISTRY_LOCKED: If True, only allow listed models.
54
+ EMBEDDING_BATCH_SIZE: Optimal batch size for embedding requests.
55
+ EMBEDDING_SERVER_MAX_REQUEST_ITEMS: Max items per embedding request.
56
+ EMBEDDING_SERVER_API_KEYS: Set of valid API keys for the embedding server.
57
+ DATABASE_PATH: Path to the SimpleVecDB database file.
58
+ SERVER_HOST: Host address for the SimpleVecDB server.
59
+ SERVER_PORT: Port number for the SimpleVecDB server.
60
+ """
61
+
62
+ # Embedding Model
63
+ EMBEDDING_MODEL: str = os.getenv("EMBEDDING_MODEL", "TaylorAI/bge-micro-v2")
64
+ EMBEDDING_CACHE_DIR: str = os.getenv(
65
+ "EMBEDDING_CACHE_DIR", str(Path.home() / ".cache" / "simplevecdb")
66
+ )
67
+ _registry_env = os.getenv("EMBEDDING_MODEL_REGISTRY")
68
+ EMBEDDING_MODEL_REGISTRY: dict[str, str] = _parse_registry(
69
+ _registry_env, EMBEDDING_MODEL
70
+ )
71
+ EMBEDDING_MODEL_REGISTRY_LOCKED: bool = _parse_bool_env(
72
+ os.getenv("EMBEDDING_MODEL_REGISTRY_LOCKED"), True
73
+ )
74
+ # Auto-detect optimal batch size if not explicitly set
75
+ _batch_size_env = os.getenv("EMBEDDING_BATCH_SIZE")
76
+ EMBEDDING_BATCH_SIZE: int = (
77
+ int(_batch_size_env)
78
+ if _batch_size_env is not None
79
+ else get_optimal_batch_size()
80
+ )
81
+ _request_limit_env = os.getenv("EMBEDDING_SERVER_MAX_REQUEST_ITEMS") or os.getenv(
82
+ "EMBEDDING_SERVER_MAX_BATCH"
83
+ )
84
+ EMBEDDING_SERVER_MAX_REQUEST_ITEMS: int = (
85
+ int(_request_limit_env) if _request_limit_env else max(32, EMBEDDING_BATCH_SIZE)
86
+ )
87
+ EMBEDDING_SERVER_API_KEYS: set[str] = _parse_api_keys(
88
+ os.getenv("EMBEDDING_SERVER_API_KEYS")
89
+ )
90
+
91
+ # Database
92
+ DATABASE_PATH: str = os.getenv("DATABASE_PATH", ":memory:")
93
+
94
+ # Server
95
+ SERVER_HOST: str = os.getenv("SERVER_HOST", "0.0.0.0")
96
+ SERVER_PORT: int = int(os.getenv("SERVER_PORT", "8000"))
97
+
98
+ @classmethod
99
+ def from_env(cls) -> "Config":
100
+ """Load configuration from environment variables."""
101
+ return cls()
102
+
103
+
104
+ # Singleton instance
105
+ config = Config.from_env()