remote-embedding 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- remote_embedding-0.1.0/LICENSE +21 -0
- remote_embedding-0.1.0/PKG-INFO +121 -0
- remote_embedding-0.1.0/README.md +92 -0
- remote_embedding-0.1.0/pyproject.toml +48 -0
- remote_embedding-0.1.0/setup.cfg +4 -0
- remote_embedding-0.1.0/src/remote_embedding/__init__.py +6 -0
- remote_embedding-0.1.0/src/remote_embedding/__main__.py +7 -0
- remote_embedding-0.1.0/src/remote_embedding/app.py +158 -0
- remote_embedding-0.1.0/src/remote_embedding/remote.py +71 -0
- remote_embedding-0.1.0/src/remote_embedding.egg-info/PKG-INFO +121 -0
- remote_embedding-0.1.0/src/remote_embedding.egg-info/SOURCES.txt +13 -0
- remote_embedding-0.1.0/src/remote_embedding.egg-info/dependency_links.txt +1 -0
- remote_embedding-0.1.0/src/remote_embedding.egg-info/entry_points.txt +2 -0
- remote_embedding-0.1.0/src/remote_embedding.egg-info/requires.txt +7 -0
- remote_embedding-0.1.0/src/remote_embedding.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Meshkat Shariat Bagheri
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: remote-embedding
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A PyPI-ready FastAPI embedding service and LangChain-compatible remote client.
|
|
5
|
+
Author: Meshkat Shariat Bagheri
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/MeshkatShB/remote-embedding
|
|
8
|
+
Project-URL: Issues, https://github.com/MeshkatShB/remote-embedding/issues
|
|
9
|
+
Keywords: embeddings,fastapi,langchain,huggingface,api
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Framework :: FastAPI
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: fastapi>=0.115
|
|
22
|
+
Requires-Dist: langchain-core>=0.3
|
|
23
|
+
Requires-Dist: langchain-huggingface>=0.1.2
|
|
24
|
+
Requires-Dist: pydantic>=2.7
|
|
25
|
+
Requires-Dist: python-dotenv>=1.0
|
|
26
|
+
Requires-Dist: requests>=2.32
|
|
27
|
+
Requires-Dist: uvicorn>=0.30
|
|
28
|
+
Dynamic: license-file
|
|
29
|
+
|
|
30
|
+
# remote-embedding
|
|
31
|
+
|
|
32
|
+
`remote-embedding` packages two things together:
|
|
33
|
+
|
|
34
|
+
- A FastAPI server that exposes a `/embed` API backed by local Hugging Face models.
|
|
35
|
+
- A LangChain-compatible `RemoteEmbeddings` client that calls that server remotely.
|
|
36
|
+
|
|
37
|
+
## Install
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
pip install remote-embedding
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Package Layout
|
|
44
|
+
|
|
45
|
+
The import package is `remote_embedding`.
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
from remote_embedding import RemoteEmbeddings
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Run The Server
|
|
52
|
+
|
|
53
|
+
Set the environment variables your model needs.
|
|
54
|
+
|
|
55
|
+
PowerShell:
|
|
56
|
+
|
|
57
|
+
```powershell
|
|
58
|
+
$env:EMBEDDING_MODEL_NAME="BAAI/bge-base-en-v1.5"
|
|
59
|
+
$env:EMBEDDING_DIR="C:\\path\\to\\model-cache"
|
|
60
|
+
$env:DEVICE="cpu"
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Bash:
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
export EMBEDDING_MODEL_NAME=BAAI/bge-base-en-v1.5
|
|
67
|
+
export EMBEDDING_DIR=/path/to/model-cache
|
|
68
|
+
export DEVICE=cpu
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
Start the API:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
remote-embedding-server
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Or:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
python -m remote_embedding
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Defaults:
|
|
84
|
+
|
|
85
|
+
- `HOST=0.0.0.0`
|
|
86
|
+
- `PORT=5055`
|
|
87
|
+
|
|
88
|
+
## Use The Client
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
from remote_embedding import RemoteEmbeddings
|
|
92
|
+
|
|
93
|
+
embeddings = RemoteEmbeddings(
|
|
94
|
+
base_url="http://127.0.0.1:5055",
|
|
95
|
+
model_name="BAAI/bge-base-en-v1.5",
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
docs = embeddings.embed_documents(["hello world", "remote embeddings"])
|
|
99
|
+
query = embeddings.embed_query("search text")
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## Build For PyPI
|
|
103
|
+
|
|
104
|
+
Build distributions locally:
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
python -m pip install --upgrade build
|
|
108
|
+
python -m build
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
This creates:
|
|
112
|
+
|
|
113
|
+
- `dist/*.tar.gz`
|
|
114
|
+
- `dist/*.whl`
|
|
115
|
+
|
|
116
|
+
Upload with Twine:
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
python -m pip install --upgrade twine
|
|
120
|
+
python -m twine upload dist/*
|
|
121
|
+
```
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# remote-embedding
|
|
2
|
+
|
|
3
|
+
`remote-embedding` packages two things together:
|
|
4
|
+
|
|
5
|
+
- A FastAPI server that exposes a `/embed` API backed by local Hugging Face models.
|
|
6
|
+
- A LangChain-compatible `RemoteEmbeddings` client that calls that server remotely.
|
|
7
|
+
|
|
8
|
+
## Install
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
pip install remote-embedding
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Package Layout
|
|
15
|
+
|
|
16
|
+
The import package is `remote_embedding`.
|
|
17
|
+
|
|
18
|
+
```python
|
|
19
|
+
from remote_embedding import RemoteEmbeddings
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Run The Server
|
|
23
|
+
|
|
24
|
+
Set the environment variables your model needs.
|
|
25
|
+
|
|
26
|
+
PowerShell:
|
|
27
|
+
|
|
28
|
+
```powershell
|
|
29
|
+
$env:EMBEDDING_MODEL_NAME="BAAI/bge-base-en-v1.5"
|
|
30
|
+
$env:EMBEDDING_DIR="C:\\path\\to\\model-cache"
|
|
31
|
+
$env:DEVICE="cpu"
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Bash:
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
export EMBEDDING_MODEL_NAME=BAAI/bge-base-en-v1.5
|
|
38
|
+
export EMBEDDING_DIR=/path/to/model-cache
|
|
39
|
+
export DEVICE=cpu
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Start the API:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
remote-embedding-server
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Or:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
python -m remote_embedding
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Defaults:
|
|
55
|
+
|
|
56
|
+
- `HOST=0.0.0.0`
|
|
57
|
+
- `PORT=5055`
|
|
58
|
+
|
|
59
|
+
## Use The Client
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
from remote_embedding import RemoteEmbeddings
|
|
63
|
+
|
|
64
|
+
embeddings = RemoteEmbeddings(
|
|
65
|
+
base_url="http://127.0.0.1:5055",
|
|
66
|
+
model_name="BAAI/bge-base-en-v1.5",
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
docs = embeddings.embed_documents(["hello world", "remote embeddings"])
|
|
70
|
+
query = embeddings.embed_query("search text")
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Build For PyPI
|
|
74
|
+
|
|
75
|
+
Build distributions locally:
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
python -m pip install --upgrade build
|
|
79
|
+
python -m build
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
This creates:
|
|
83
|
+
|
|
84
|
+
- `dist/*.tar.gz`
|
|
85
|
+
- `dist/*.whl`
|
|
86
|
+
|
|
87
|
+
Upload with Twine:
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
python -m pip install --upgrade twine
|
|
91
|
+
python -m twine upload dist/*
|
|
92
|
+
```
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=69", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "remote-embedding"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "A PyPI-ready FastAPI embedding service and LangChain-compatible remote client."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
license-files = ["LICENSE"]
|
|
13
|
+
authors = [
|
|
14
|
+
{ name = "Meshkat Shariat Bagheri" }
|
|
15
|
+
]
|
|
16
|
+
keywords = ["embeddings", "fastapi", "langchain", "huggingface", "api"]
|
|
17
|
+
classifiers = [
|
|
18
|
+
"Development Status :: 3 - Alpha",
|
|
19
|
+
"Intended Audience :: Developers",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.10",
|
|
22
|
+
"Programming Language :: Python :: 3.11",
|
|
23
|
+
"Programming Language :: Python :: 3.12",
|
|
24
|
+
"Framework :: FastAPI",
|
|
25
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
26
|
+
]
|
|
27
|
+
dependencies = [
|
|
28
|
+
"fastapi>=0.115",
|
|
29
|
+
"langchain-core>=0.3",
|
|
30
|
+
"langchain-huggingface>=0.1.2",
|
|
31
|
+
"pydantic>=2.7",
|
|
32
|
+
"python-dotenv>=1.0",
|
|
33
|
+
"requests>=2.32",
|
|
34
|
+
"uvicorn>=0.30",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
[project.urls]
|
|
38
|
+
Homepage = "https://github.com/MeshkatShB/remote-embedding"
|
|
39
|
+
Issues = "https://github.com/MeshkatShB/remote-embedding/issues"
|
|
40
|
+
|
|
41
|
+
[project.scripts]
|
|
42
|
+
remote-embedding-server = "remote_embedding.app:main"
|
|
43
|
+
|
|
44
|
+
[tool.setuptools]
|
|
45
|
+
package-dir = { "" = "src" }
|
|
46
|
+
|
|
47
|
+
[tool.setuptools.packages.find]
|
|
48
|
+
where = ["src"]
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
"""FastAPI application serving remote embedding inference."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import os
|
|
5
|
+
from contextlib import asynccontextmanager
|
|
6
|
+
from typing import Literal, Optional, Union
|
|
7
|
+
|
|
8
|
+
import uvicorn
|
|
9
|
+
from dotenv import load_dotenv
|
|
10
|
+
from fastapi import FastAPI, HTTPException
|
|
11
|
+
from langchain_huggingface import HuggingFaceEmbeddings
|
|
12
|
+
from pydantic import BaseModel, Field
|
|
13
|
+
|
|
14
|
+
load_dotenv()
|
|
15
|
+
|
|
16
|
+
DEFAULT_HOST = os.getenv("HOST", "0.0.0.0")
|
|
17
|
+
DEFAULT_PORT = int(os.getenv("PORT", "5055"))
|
|
18
|
+
EMBEDDING_MODEL_NAME = os.getenv("EMBEDDING_MODEL_NAME")
|
|
19
|
+
EMBEDDING_DIR = os.getenv("EMBEDDING_DIR")
|
|
20
|
+
DEVICE = os.getenv("DEVICE")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class EmbeddingRequest(BaseModel):
|
|
24
|
+
input: Union[str, list[str]] = Field(..., description="String or list of strings")
|
|
25
|
+
mode: Literal["documents", "query"] = "documents"
|
|
26
|
+
model_name: Optional[str] = None
|
|
27
|
+
instruction: Optional[str] = None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class EmbeddingResponse(BaseModel):
|
|
31
|
+
model: str
|
|
32
|
+
dimensions: int
|
|
33
|
+
count: int
|
|
34
|
+
data: list[list[float]]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class HealthResponse(BaseModel):
|
|
38
|
+
status: str
|
|
39
|
+
model: str
|
|
40
|
+
device: Optional[str]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class EmbeddingService:
|
|
44
|
+
def __init__(self) -> None:
|
|
45
|
+
self.embed_models: dict[str, HuggingFaceEmbeddings] = {}
|
|
46
|
+
self.lock = asyncio.Lock()
|
|
47
|
+
|
|
48
|
+
def _resolve_model_name(self, model_name: Optional[str] = None) -> str:
|
|
49
|
+
resolved_model_name = (model_name or EMBEDDING_MODEL_NAME or "").strip()
|
|
50
|
+
if not resolved_model_name:
|
|
51
|
+
raise RuntimeError(
|
|
52
|
+
"No embedding model specified. Set EMBEDDING_MODEL_NAME or pass model_name in the request."
|
|
53
|
+
)
|
|
54
|
+
return resolved_model_name
|
|
55
|
+
|
|
56
|
+
def load(self, model_name: Optional[str] = None) -> HuggingFaceEmbeddings:
|
|
57
|
+
resolved_model_name = self._resolve_model_name(model_name)
|
|
58
|
+
if resolved_model_name in self.embed_models:
|
|
59
|
+
return self.embed_models[resolved_model_name]
|
|
60
|
+
|
|
61
|
+
embed_model = HuggingFaceEmbeddings(
|
|
62
|
+
model_name=resolved_model_name,
|
|
63
|
+
model_kwargs={
|
|
64
|
+
"device": DEVICE,
|
|
65
|
+
"local_files_only": True,
|
|
66
|
+
"trust_remote_code": True,
|
|
67
|
+
},
|
|
68
|
+
cache_folder=EMBEDDING_DIR,
|
|
69
|
+
)
|
|
70
|
+
self.embed_models[resolved_model_name] = embed_model
|
|
71
|
+
return embed_model
|
|
72
|
+
|
|
73
|
+
async def embed_documents(
|
|
74
|
+
self,
|
|
75
|
+
texts: list[str],
|
|
76
|
+
model_name: Optional[str] = None,
|
|
77
|
+
) -> list[list[float]]:
|
|
78
|
+
embed_model = self.load(model_name)
|
|
79
|
+
|
|
80
|
+
# Serialize GPU access to avoid VRAM spikes from concurrent requests.
|
|
81
|
+
async with self.lock:
|
|
82
|
+
return await asyncio.to_thread(embed_model.embed_documents, texts)
|
|
83
|
+
|
|
84
|
+
async def embed_query(self, text: str, model_name: Optional[str] = None) -> list[float]:
|
|
85
|
+
embed_model = self.load(model_name)
|
|
86
|
+
|
|
87
|
+
async with self.lock:
|
|
88
|
+
return await asyncio.to_thread(embed_model.embed_query, text)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
svc = EmbeddingService()
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@asynccontextmanager
|
|
95
|
+
async def lifespan(_: FastAPI):
|
|
96
|
+
if EMBEDDING_MODEL_NAME:
|
|
97
|
+
svc.load()
|
|
98
|
+
yield
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
app = FastAPI(title="Shared Embedding Service", version="0.1.0", lifespan=lifespan)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@app.get("/health", response_model=HealthResponse)
|
|
105
|
+
async def health() -> HealthResponse:
|
|
106
|
+
configured_model_name = (EMBEDDING_MODEL_NAME or "").strip()
|
|
107
|
+
loaded_model_name = configured_model_name or next(iter(svc.embed_models), "")
|
|
108
|
+
|
|
109
|
+
if not loaded_model_name:
|
|
110
|
+
raise HTTPException(status_code=503, detail="Model not loaded")
|
|
111
|
+
|
|
112
|
+
return HealthResponse(
|
|
113
|
+
status="ok",
|
|
114
|
+
model=loaded_model_name,
|
|
115
|
+
device=DEVICE,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@app.post("/embed", response_model=EmbeddingResponse)
|
|
120
|
+
async def embed(req: EmbeddingRequest) -> EmbeddingResponse:
|
|
121
|
+
texts = [req.input] if isinstance(req.input, str) else req.input
|
|
122
|
+
|
|
123
|
+
if not texts or any(not isinstance(text, str) or not text.strip() for text in texts):
|
|
124
|
+
raise HTTPException(status_code=400, detail="Input must contain non-empty strings")
|
|
125
|
+
|
|
126
|
+
resolved_model_name = (req.model_name or EMBEDDING_MODEL_NAME or "").strip()
|
|
127
|
+
if not resolved_model_name:
|
|
128
|
+
raise HTTPException(
|
|
129
|
+
status_code=400,
|
|
130
|
+
detail="No embedding model specified. Set EMBEDDING_MODEL_NAME or pass model_name.",
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
try:
|
|
134
|
+
if req.mode == "query":
|
|
135
|
+
if len(texts) != 1:
|
|
136
|
+
raise HTTPException(
|
|
137
|
+
status_code=400,
|
|
138
|
+
detail="mode='query' requires a single input string",
|
|
139
|
+
)
|
|
140
|
+
vectors = [await svc.embed_query(texts[0], model_name=resolved_model_name)]
|
|
141
|
+
else:
|
|
142
|
+
vectors = await svc.embed_documents(texts, model_name=resolved_model_name)
|
|
143
|
+
|
|
144
|
+
dimensions = len(vectors[0]) if vectors else 0
|
|
145
|
+
return EmbeddingResponse(
|
|
146
|
+
model=resolved_model_name,
|
|
147
|
+
dimensions=dimensions,
|
|
148
|
+
count=len(vectors),
|
|
149
|
+
data=vectors,
|
|
150
|
+
)
|
|
151
|
+
except HTTPException:
|
|
152
|
+
raise
|
|
153
|
+
except Exception as exc:
|
|
154
|
+
raise HTTPException(status_code=500, detail=f"Embedding failed: {exc}") from exc
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def main() -> None:
|
|
158
|
+
uvicorn.run("remote_embedding.app:app", host=DEFAULT_HOST, port=DEFAULT_PORT)
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Client for the remote embedding FastAPI service."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
from langchain_core.embeddings import Embeddings
|
|
7
|
+
from requests.adapters import HTTPAdapter
|
|
8
|
+
from urllib3.util.retry import Retry
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class RemoteEmbeddings(Embeddings):
|
|
12
|
+
def __init__(
|
|
13
|
+
self,
|
|
14
|
+
base_url: str = "http://127.0.0.1:5055",
|
|
15
|
+
timeout: int = 300,
|
|
16
|
+
expected_dimensions: Optional[int] = None,
|
|
17
|
+
model_name: Optional[str] = None,
|
|
18
|
+
) -> None:
|
|
19
|
+
self.base_url = base_url.rstrip("/")
|
|
20
|
+
self.timeout = timeout
|
|
21
|
+
self.expected_dimensions = expected_dimensions
|
|
22
|
+
self.model_name = model_name
|
|
23
|
+
|
|
24
|
+
self.session = requests.Session()
|
|
25
|
+
retries = Retry(
|
|
26
|
+
total=3,
|
|
27
|
+
backoff_factor=0.5,
|
|
28
|
+
status_forcelist=[429, 500, 502, 503, 504],
|
|
29
|
+
allowed_methods=["POST", "GET"],
|
|
30
|
+
)
|
|
31
|
+
adapter = HTTPAdapter(max_retries=retries)
|
|
32
|
+
self.session.mount("http://", adapter)
|
|
33
|
+
self.session.mount("https://", adapter)
|
|
34
|
+
|
|
35
|
+
def _check_dim(self, vectors: list[list[float]]) -> None:
|
|
36
|
+
if not vectors or self.expected_dimensions is None:
|
|
37
|
+
return
|
|
38
|
+
if len(vectors[0]) != self.expected_dimensions:
|
|
39
|
+
raise ValueError(
|
|
40
|
+
f"Embedding dimension mismatch: expected {self.expected_dimensions}, got {len(vectors[0])}"
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
def embed_documents(self, texts: list[str]) -> list[list[float]]:
|
|
44
|
+
payload = {"input": texts, "mode": "documents"}
|
|
45
|
+
if self.model_name:
|
|
46
|
+
payload["model_name"] = self.model_name
|
|
47
|
+
|
|
48
|
+
response = self.session.post(
|
|
49
|
+
f"{self.base_url}/embed",
|
|
50
|
+
json=payload,
|
|
51
|
+
timeout=self.timeout,
|
|
52
|
+
)
|
|
53
|
+
response.raise_for_status()
|
|
54
|
+
vectors = response.json()["data"]
|
|
55
|
+
self._check_dim(vectors)
|
|
56
|
+
return vectors
|
|
57
|
+
|
|
58
|
+
def embed_query(self, text: str) -> list[float]:
|
|
59
|
+
payload = {"input": text, "mode": "query"}
|
|
60
|
+
if self.model_name:
|
|
61
|
+
payload["model_name"] = self.model_name
|
|
62
|
+
|
|
63
|
+
response = self.session.post(
|
|
64
|
+
f"{self.base_url}/embed",
|
|
65
|
+
json=payload,
|
|
66
|
+
timeout=self.timeout,
|
|
67
|
+
)
|
|
68
|
+
response.raise_for_status()
|
|
69
|
+
vectors = response.json()["data"]
|
|
70
|
+
self._check_dim(vectors)
|
|
71
|
+
return vectors[0]
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: remote-embedding
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A PyPI-ready FastAPI embedding service and LangChain-compatible remote client.
|
|
5
|
+
Author: Meshkat Shariat Bagheri
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/MeshkatShB/remote-embedding
|
|
8
|
+
Project-URL: Issues, https://github.com/MeshkatShB/remote-embedding/issues
|
|
9
|
+
Keywords: embeddings,fastapi,langchain,huggingface,api
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Framework :: FastAPI
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: fastapi>=0.115
|
|
22
|
+
Requires-Dist: langchain-core>=0.3
|
|
23
|
+
Requires-Dist: langchain-huggingface>=0.1.2
|
|
24
|
+
Requires-Dist: pydantic>=2.7
|
|
25
|
+
Requires-Dist: python-dotenv>=1.0
|
|
26
|
+
Requires-Dist: requests>=2.32
|
|
27
|
+
Requires-Dist: uvicorn>=0.30
|
|
28
|
+
Dynamic: license-file
|
|
29
|
+
|
|
30
|
+
# remote-embedding
|
|
31
|
+
|
|
32
|
+
`remote-embedding` packages two things together:
|
|
33
|
+
|
|
34
|
+
- A FastAPI server that exposes a `/embed` API backed by local Hugging Face models.
|
|
35
|
+
- A LangChain-compatible `RemoteEmbeddings` client that calls that server remotely.
|
|
36
|
+
|
|
37
|
+
## Install
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
pip install remote-embedding
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Package Layout
|
|
44
|
+
|
|
45
|
+
The import package is `remote_embedding`.
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
from remote_embedding import RemoteEmbeddings
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Run The Server
|
|
52
|
+
|
|
53
|
+
Set the environment variables your model needs.
|
|
54
|
+
|
|
55
|
+
PowerShell:
|
|
56
|
+
|
|
57
|
+
```powershell
|
|
58
|
+
$env:EMBEDDING_MODEL_NAME="BAAI/bge-base-en-v1.5"
|
|
59
|
+
$env:EMBEDDING_DIR="C:\\path\\to\\model-cache"
|
|
60
|
+
$env:DEVICE="cpu"
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Bash:
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
export EMBEDDING_MODEL_NAME=BAAI/bge-base-en-v1.5
|
|
67
|
+
export EMBEDDING_DIR=/path/to/model-cache
|
|
68
|
+
export DEVICE=cpu
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
Start the API:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
remote-embedding-server
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Or:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
python -m remote_embedding
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Defaults:
|
|
84
|
+
|
|
85
|
+
- `HOST=0.0.0.0`
|
|
86
|
+
- `PORT=5055`
|
|
87
|
+
|
|
88
|
+
## Use The Client
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
from remote_embedding import RemoteEmbeddings
|
|
92
|
+
|
|
93
|
+
embeddings = RemoteEmbeddings(
|
|
94
|
+
base_url="http://127.0.0.1:5055",
|
|
95
|
+
model_name="BAAI/bge-base-en-v1.5",
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
docs = embeddings.embed_documents(["hello world", "remote embeddings"])
|
|
99
|
+
query = embeddings.embed_query("search text")
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## Build For PyPI
|
|
103
|
+
|
|
104
|
+
Build distributions locally:
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
python -m pip install --upgrade build
|
|
108
|
+
python -m build
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
This creates:
|
|
112
|
+
|
|
113
|
+
- `dist/*.tar.gz`
|
|
114
|
+
- `dist/*.whl`
|
|
115
|
+
|
|
116
|
+
Upload with Twine:
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
python -m pip install --upgrade twine
|
|
120
|
+
python -m twine upload dist/*
|
|
121
|
+
```
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
src/remote_embedding/__init__.py
|
|
5
|
+
src/remote_embedding/__main__.py
|
|
6
|
+
src/remote_embedding/app.py
|
|
7
|
+
src/remote_embedding/remote.py
|
|
8
|
+
src/remote_embedding.egg-info/PKG-INFO
|
|
9
|
+
src/remote_embedding.egg-info/SOURCES.txt
|
|
10
|
+
src/remote_embedding.egg-info/dependency_links.txt
|
|
11
|
+
src/remote_embedding.egg-info/entry_points.txt
|
|
12
|
+
src/remote_embedding.egg-info/requires.txt
|
|
13
|
+
src/remote_embedding.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
remote_embedding
|