remote-embedding 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {remote_embedding-0.2.0 → remote_embedding-0.3.0}/LICENSE +21 -21
- {remote_embedding-0.2.0/src/remote_embedding.egg-info → remote_embedding-0.3.0}/PKG-INFO +248 -236
- {remote_embedding-0.2.0 → remote_embedding-0.3.0}/README.md +13 -1
- {remote_embedding-0.2.0 → remote_embedding-0.3.0}/pyproject.toml +1 -1
- {remote_embedding-0.2.0 → remote_embedding-0.3.0}/setup.cfg +4 -4
- remote_embedding-0.3.0/src/remote_embedding/__init__.py +12 -0
- {remote_embedding-0.2.0 → remote_embedding-0.3.0}/src/remote_embedding/app.py +128 -18
- {remote_embedding-0.2.0 → remote_embedding-0.3.0/src/remote_embedding.egg-info}/PKG-INFO +248 -236
- remote_embedding-0.2.0/src/remote_embedding/__init__.py +0 -6
- {remote_embedding-0.2.0 → remote_embedding-0.3.0}/src/remote_embedding/__main__.py +0 -0
- {remote_embedding-0.2.0 → remote_embedding-0.3.0}/src/remote_embedding/remote.py +0 -0
- {remote_embedding-0.2.0 → remote_embedding-0.3.0}/src/remote_embedding.egg-info/SOURCES.txt +0 -0
- {remote_embedding-0.2.0 → remote_embedding-0.3.0}/src/remote_embedding.egg-info/dependency_links.txt +0 -0
- {remote_embedding-0.2.0 → remote_embedding-0.3.0}/src/remote_embedding.egg-info/entry_points.txt +0 -0
- {remote_embedding-0.2.0 → remote_embedding-0.3.0}/src/remote_embedding.egg-info/requires.txt +0 -0
- {remote_embedding-0.2.0 → remote_embedding-0.3.0}/src/remote_embedding.egg-info/top_level.txt +0 -0
|
@@ -1,21 +1,21 @@
|
|
|
1
|
-
MIT License
|
|
2
|
-
|
|
3
|
-
Copyright (c) 2026 Meshkat Shariat Bagheri
|
|
4
|
-
|
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
-
in the Software without restriction, including without limitation the rights
|
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
-
furnished to do so, subject to the following conditions:
|
|
11
|
-
|
|
12
|
-
The above copyright notice and this permission notice shall be included in all
|
|
13
|
-
copies or substantial portions of the Software.
|
|
14
|
-
|
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Meshkat Shariat Bagheri
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -1,236 +1,248 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: remote-embedding
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: A shared FastAPI embedding service and LangChain-compatible remote client for reusing one embedding model across multiple applications and lowering VRAM usage on limited GPUs.
|
|
5
|
-
Author: Meshkat Shariat Bagheri
|
|
6
|
-
License-Expression: MIT
|
|
7
|
-
Project-URL: Homepage, https://github.com/MeshkatShB/remote-embedding
|
|
8
|
-
Project-URL: Issues, https://github.com/MeshkatShB/remote-embedding/issues
|
|
9
|
-
Keywords: embeddings,fastapi,langchain,huggingface,api
|
|
10
|
-
Classifier: Development Status :: 3 - Alpha
|
|
11
|
-
Classifier: Intended Audience :: Developers
|
|
12
|
-
Classifier: Programming Language :: Python :: 3
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
-
Classifier: Framework :: FastAPI
|
|
17
|
-
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
-
Requires-Python: >=3.10
|
|
19
|
-
Description-Content-Type: text/markdown
|
|
20
|
-
License-File: LICENSE
|
|
21
|
-
Requires-Dist: fastapi>=0.115
|
|
22
|
-
Requires-Dist: langchain-core>=0.3
|
|
23
|
-
Requires-Dist: langchain-huggingface>=0.1.2
|
|
24
|
-
Requires-Dist: pydantic>=2.7
|
|
25
|
-
Requires-Dist: python-dotenv>=1.0
|
|
26
|
-
Requires-Dist: requests>=2.32
|
|
27
|
-
Requires-Dist: uvicorn>=0.30
|
|
28
|
-
Dynamic: license-file
|
|
29
|
-
|
|
30
|
-
# remote-embedding
|
|
31
|
-
|
|
32
|
-
`remote-embedding` packages two things together:
|
|
33
|
-
|
|
34
|
-
- A FastAPI server that exposes a `/embed` API backed by local Hugging Face models.
|
|
35
|
-
- A LangChain-compatible `RemoteEmbeddings` client that calls that server remotely.
|
|
36
|
-
|
|
37
|
-
This lets multiple applications share a single loaded embedding model instance instead of each process loading its own copy. On constrained GPUs, that reduces duplicated VRAM usage and makes it easier to serve embeddings from limited hardware.
|
|
38
|
-
|
|
39
|
-
## Install
|
|
40
|
-
|
|
41
|
-
```bash
|
|
42
|
-
pip install remote-embedding
|
|
43
|
-
```
|
|
44
|
-
|
|
45
|
-
## Package Layout
|
|
46
|
-
|
|
47
|
-
The import package is `remote_embedding`.
|
|
48
|
-
|
|
49
|
-
```python
|
|
50
|
-
from remote_embedding import RemoteEmbeddings
|
|
51
|
-
```
|
|
52
|
-
|
|
53
|
-
## Run The Server
|
|
54
|
-
|
|
55
|
-
Set the environment variables your model needs. You can copy values from `.env.example` into your own `.env` file, or set them directly in the shell.
|
|
56
|
-
|
|
57
|
-
PowerShell:
|
|
58
|
-
|
|
59
|
-
```powershell
|
|
60
|
-
$env:EMBEDDING_MODEL_NAME="BAAI/bge-base-en-v1.5"
|
|
61
|
-
$env:EMBEDDING_DIR="C:\\path\\to\\model-cache"
|
|
62
|
-
$env:DEVICE="cpu"
|
|
63
|
-
$env:
|
|
64
|
-
$env:
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
export
|
|
74
|
-
export
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
--
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
- `HOST
|
|
114
|
-
- `PORT
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
- `
|
|
124
|
-
- `
|
|
125
|
-
- `
|
|
126
|
-
- `
|
|
127
|
-
- `
|
|
128
|
-
- `
|
|
129
|
-
- `
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
```python
|
|
173
|
-
from
|
|
174
|
-
|
|
175
|
-
embed_model =
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
```bash
|
|
202
|
-
python -m pip install --upgrade
|
|
203
|
-
python -m
|
|
204
|
-
```
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
python -m
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
```
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: remote-embedding
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: A shared FastAPI embedding service and LangChain-compatible remote client for reusing one embedding model across multiple applications and lowering VRAM usage on limited GPUs.
|
|
5
|
+
Author: Meshkat Shariat Bagheri
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/MeshkatShB/remote-embedding
|
|
8
|
+
Project-URL: Issues, https://github.com/MeshkatShB/remote-embedding/issues
|
|
9
|
+
Keywords: embeddings,fastapi,langchain,huggingface,api
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Framework :: FastAPI
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: fastapi>=0.115
|
|
22
|
+
Requires-Dist: langchain-core>=0.3
|
|
23
|
+
Requires-Dist: langchain-huggingface>=0.1.2
|
|
24
|
+
Requires-Dist: pydantic>=2.7
|
|
25
|
+
Requires-Dist: python-dotenv>=1.0
|
|
26
|
+
Requires-Dist: requests>=2.32
|
|
27
|
+
Requires-Dist: uvicorn>=0.30
|
|
28
|
+
Dynamic: license-file
|
|
29
|
+
|
|
30
|
+
# remote-embedding
|
|
31
|
+
|
|
32
|
+
`remote-embedding` packages two things together:
|
|
33
|
+
|
|
34
|
+
- A FastAPI server that exposes a `/embed` API backed by local Hugging Face models.
|
|
35
|
+
- A LangChain-compatible `RemoteEmbeddings` client that calls that server remotely.
|
|
36
|
+
|
|
37
|
+
This lets multiple applications share a single loaded embedding model instance instead of each process loading its own copy. On constrained GPUs, that reduces duplicated VRAM usage and makes it easier to serve embeddings from limited hardware.
|
|
38
|
+
|
|
39
|
+
## Install
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pip install remote-embedding
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Package Layout
|
|
46
|
+
|
|
47
|
+
The import package is `remote_embedding`.
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
from remote_embedding import RemoteEmbeddings
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Run The Server
|
|
54
|
+
|
|
55
|
+
Set the environment variables your model needs. You can copy values from `.env.example` into your own `.env` file, or set them directly in the shell.
|
|
56
|
+
|
|
57
|
+
PowerShell:
|
|
58
|
+
|
|
59
|
+
```powershell
|
|
60
|
+
$env:EMBEDDING_MODEL_NAME="BAAI/bge-base-en-v1.5"
|
|
61
|
+
$env:EMBEDDING_DIR="C:\\path\\to\\model-cache"
|
|
62
|
+
$env:DEVICE="cpu"
|
|
63
|
+
$env:MAX_LOADED_MODELS="1"
|
|
64
|
+
$env:MAX_INPUTS_PER_REQUEST="128"
|
|
65
|
+
$env:EMBEDDING_BATCH_SIZE="32"
|
|
66
|
+
$env:MODEL_KWARGS='{"local_files_only": true, "trust_remote_code": true}'
|
|
67
|
+
$env:ENCODE_KWARGS='{"normalize_embeddings": true}'
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Bash:
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
export EMBEDDING_MODEL_NAME=BAAI/bge-base-en-v1.5
|
|
74
|
+
export EMBEDDING_DIR=/path/to/model-cache
|
|
75
|
+
export DEVICE=cpu
|
|
76
|
+
export MAX_LOADED_MODELS=1
|
|
77
|
+
export MAX_INPUTS_PER_REQUEST=128
|
|
78
|
+
export EMBEDDING_BATCH_SIZE=32
|
|
79
|
+
export MODEL_KWARGS='{"local_files_only": true, "trust_remote_code": true}'
|
|
80
|
+
export ENCODE_KWARGS='{"normalize_embeddings": true}'
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
You can also configure the server with CLI flags:
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
remote-embedding-server \
|
|
87
|
+
--host 0.0.0.0 \
|
|
88
|
+
--port 5055 \
|
|
89
|
+
--model-name BAAI/bge-base-en-v1.5 \
|
|
90
|
+
--embedding-dir /path/to/model-cache \
|
|
91
|
+
--device cuda \
|
|
92
|
+
--max-loaded-models 1 \
|
|
93
|
+
--max-inputs-per-request 128 \
|
|
94
|
+
--embedding-batch-size 32 \
|
|
95
|
+
--model-kwargs '{"local_files_only": true, "trust_remote_code": true}' \
|
|
96
|
+
--encode-kwargs '{"normalize_embeddings": true}'
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Start the API:
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
remote-embedding-server
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
Or:
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
python -m remote_embedding
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Defaults:
|
|
112
|
+
|
|
113
|
+
- `HOST=0.0.0.0`
|
|
114
|
+
- `PORT=5055`
|
|
115
|
+
|
|
116
|
+
CLI flags override environment variables for the current process.
|
|
117
|
+
|
|
118
|
+
## Configuration
|
|
119
|
+
|
|
120
|
+
Server configuration:
|
|
121
|
+
|
|
122
|
+
- `HOST`: bind address for the FastAPI server
|
|
123
|
+
- `PORT`: bind port for the FastAPI server
|
|
124
|
+
- `EMBEDDING_MODEL_NAME`: default model to preload and use when a request does not pass `model_name`
|
|
125
|
+
- `EMBEDDING_DIR`: optional local cache/model directory for Hugging Face downloads or local files
|
|
126
|
+
- `DEVICE`: device passed to `HuggingFaceEmbeddings`, such as `cpu` or `cuda`
|
|
127
|
+
- `MAX_LOADED_MODELS`: maximum number of embedding model instances kept in memory, default `1`
|
|
128
|
+
- `MAX_INPUTS_PER_REQUEST`: maximum number of strings accepted in one `/embed` request, default `128`
|
|
129
|
+
- `EMBEDDING_BATCH_SIZE`: default encoder `batch_size`, default `32`
|
|
130
|
+
- `MODEL_KWARGS`: JSON object merged into `HuggingFaceEmbeddings(..., model_kwargs=...)`
|
|
131
|
+
- `ENCODE_KWARGS`: JSON object passed to `HuggingFaceEmbeddings(..., encode_kwargs=...)`
|
|
132
|
+
|
|
133
|
+
Client configuration through `RemoteEmbeddings(...)`:
|
|
134
|
+
|
|
135
|
+
- `base_url`: full server URL, such as `http://127.0.0.1:5055`
|
|
136
|
+
- `timeout`: request timeout in seconds
|
|
137
|
+
- `expected_dimensions`: optional validation for returned vector size
|
|
138
|
+
- `model_name`: optional per-client default model name sent with each request
|
|
139
|
+
- `embedding_dir`: optional per-client cache/model directory override sent with each request
|
|
140
|
+
- `model_kwargs`: optional JSON-serializable dict sent to the server and merged into `model_kwargs`
|
|
141
|
+
- `encode_kwargs`: optional JSON-serializable dict sent to the server as `encode_kwargs`
|
|
142
|
+
|
|
143
|
+
If `EMBEDDING_MODEL_NAME` is configured on the server, the server can preload one shared embedding model instance and let multiple applications reuse it. That is what saves VRAM versus loading the same model separately in each application process.
|
|
144
|
+
|
|
145
|
+
`model_kwargs` and `encode_kwargs` become part of the server-side model cache key. Different combinations can create different embedding instances. The server evicts older instances once `MAX_LOADED_MODELS` is exceeded, and defaults to keeping one model loaded to protect GPU memory.
|
|
146
|
+
|
|
147
|
+
## Use The Client
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
from remote_embedding import RemoteEmbeddings
|
|
151
|
+
|
|
152
|
+
embeddings = RemoteEmbeddings(
|
|
153
|
+
base_url="http://127.0.0.1:5055",
|
|
154
|
+
timeout=120,
|
|
155
|
+
expected_dimensions=768,
|
|
156
|
+
model_name="BAAI/bge-base-en-v1.5",
|
|
157
|
+
embedding_dir="C:/models/cache",
|
|
158
|
+
model_kwargs={"local_files_only": True, "trust_remote_code": True},
|
|
159
|
+
encode_kwargs={"normalize_embeddings": True},
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
docs = embeddings.embed_documents(["hello world", "remote embeddings"])
|
|
163
|
+
query = embeddings.embed_query("search text")
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
## RAG Pipeline Usage
|
|
167
|
+
|
|
168
|
+
If your RAG pipeline currently loads a local embedding model inside each application process, you can replace that with `RemoteEmbeddings` and route embedding calls to one shared server.
|
|
169
|
+
|
|
170
|
+
Before:
|
|
171
|
+
|
|
172
|
+
```python
|
|
173
|
+
from langchain_huggingface import HuggingFaceEmbeddings
|
|
174
|
+
|
|
175
|
+
embed_model = HuggingFaceEmbeddings(
|
|
176
|
+
model_name="Qwen/Qwen3-Embedding-0.6B",
|
|
177
|
+
model_kwargs={"device": "cuda", "local_files_only": True},
|
|
178
|
+
cache_folder=EMBEDDING_DIR,
|
|
179
|
+
)
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
After:
|
|
183
|
+
|
|
184
|
+
```python
|
|
185
|
+
from remote_embedding import RemoteEmbeddings
|
|
186
|
+
|
|
187
|
+
embed_model = RemoteEmbeddings(
|
|
188
|
+
base_url="http://127.0.0.1:5055",
|
|
189
|
+
model_name="Qwen/Qwen3-Embedding-0.6B",
|
|
190
|
+
embedding_dir="C:/models/cache",
|
|
191
|
+
encode_kwargs={"normalize_embeddings": True},
|
|
192
|
+
)
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
This makes it easier for multiple RAG applications, workers, or services to share the same loaded embedding model instead of each loading its own copy into GPU memory.
|
|
196
|
+
|
|
197
|
+
## Build For PyPI
|
|
198
|
+
|
|
199
|
+
Build distributions locally:
|
|
200
|
+
|
|
201
|
+
```bash
|
|
202
|
+
python -m pip install --upgrade build
|
|
203
|
+
python -m build
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
This creates:
|
|
207
|
+
|
|
208
|
+
- `dist/*.tar.gz`
|
|
209
|
+
- `dist/*.whl`
|
|
210
|
+
|
|
211
|
+
Upload with Twine:
|
|
212
|
+
|
|
213
|
+
```bash
|
|
214
|
+
python -m pip install --upgrade twine
|
|
215
|
+
python -m twine upload dist/*
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
## Contributing
|
|
219
|
+
|
|
220
|
+
Contributions are welcome through issues and pull requests.
|
|
221
|
+
|
|
222
|
+
Typical local workflow:
|
|
223
|
+
|
|
224
|
+
```bash
|
|
225
|
+
git clone git@github.com:MeshkatShB/remote-embedding.git
|
|
226
|
+
cd remote-embedding
|
|
227
|
+
python -m pip install --upgrade build
|
|
228
|
+
python -m build
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
If you change packaging metadata, rebuild `dist/` before opening a release-oriented pull request.
|
|
232
|
+
|
|
233
|
+
## License
|
|
234
|
+
|
|
235
|
+
This project is licensed under the MIT License. See `LICENSE` for the full text.
|
|
236
|
+
|
|
237
|
+
## Citation
|
|
238
|
+
|
|
239
|
+
If you use this project in research, infrastructure, or published work, cite the repository:
|
|
240
|
+
|
|
241
|
+
```bibtex
|
|
242
|
+
@software{bagheri_remote_embedding_2026,
|
|
243
|
+
author = {Bagheri, Meshkat Shariat},
|
|
244
|
+
title = {remote-embedding},
|
|
245
|
+
year = {2026},
|
|
246
|
+
url = {https://github.com/MeshkatShB/remote-embedding}
|
|
247
|
+
}
|
|
248
|
+
```
|
|
@@ -31,6 +31,9 @@ PowerShell:
|
|
|
31
31
|
$env:EMBEDDING_MODEL_NAME="BAAI/bge-base-en-v1.5"
|
|
32
32
|
$env:EMBEDDING_DIR="C:\\path\\to\\model-cache"
|
|
33
33
|
$env:DEVICE="cpu"
|
|
34
|
+
$env:MAX_LOADED_MODELS="1"
|
|
35
|
+
$env:MAX_INPUTS_PER_REQUEST="128"
|
|
36
|
+
$env:EMBEDDING_BATCH_SIZE="32"
|
|
34
37
|
$env:MODEL_KWARGS='{"local_files_only": true, "trust_remote_code": true}'
|
|
35
38
|
$env:ENCODE_KWARGS='{"normalize_embeddings": true}'
|
|
36
39
|
```
|
|
@@ -41,6 +44,9 @@ Bash:
|
|
|
41
44
|
export EMBEDDING_MODEL_NAME=BAAI/bge-base-en-v1.5
|
|
42
45
|
export EMBEDDING_DIR=/path/to/model-cache
|
|
43
46
|
export DEVICE=cpu
|
|
47
|
+
export MAX_LOADED_MODELS=1
|
|
48
|
+
export MAX_INPUTS_PER_REQUEST=128
|
|
49
|
+
export EMBEDDING_BATCH_SIZE=32
|
|
44
50
|
export MODEL_KWARGS='{"local_files_only": true, "trust_remote_code": true}'
|
|
45
51
|
export ENCODE_KWARGS='{"normalize_embeddings": true}'
|
|
46
52
|
```
|
|
@@ -54,6 +60,9 @@ remote-embedding-server \
|
|
|
54
60
|
--model-name BAAI/bge-base-en-v1.5 \
|
|
55
61
|
--embedding-dir /path/to/model-cache \
|
|
56
62
|
--device cuda \
|
|
63
|
+
--max-loaded-models 1 \
|
|
64
|
+
--max-inputs-per-request 128 \
|
|
65
|
+
--embedding-batch-size 32 \
|
|
57
66
|
--model-kwargs '{"local_files_only": true, "trust_remote_code": true}' \
|
|
58
67
|
--encode-kwargs '{"normalize_embeddings": true}'
|
|
59
68
|
```
|
|
@@ -86,6 +95,9 @@ Server configuration:
|
|
|
86
95
|
- `EMBEDDING_MODEL_NAME`: default model to preload and use when a request does not pass `model_name`
|
|
87
96
|
- `EMBEDDING_DIR`: optional local cache/model directory for Hugging Face downloads or local files
|
|
88
97
|
- `DEVICE`: device passed to `HuggingFaceEmbeddings`, such as `cpu` or `cuda`
|
|
98
|
+
- `MAX_LOADED_MODELS`: maximum number of embedding model instances kept in memory, default `1`
|
|
99
|
+
- `MAX_INPUTS_PER_REQUEST`: maximum number of strings accepted in one `/embed` request, default `128`
|
|
100
|
+
- `EMBEDDING_BATCH_SIZE`: default encoder `batch_size`, default `32`
|
|
89
101
|
- `MODEL_KWARGS`: JSON object merged into `HuggingFaceEmbeddings(..., model_kwargs=...)`
|
|
90
102
|
- `ENCODE_KWARGS`: JSON object passed to `HuggingFaceEmbeddings(..., encode_kwargs=...)`
|
|
91
103
|
|
|
@@ -101,7 +113,7 @@ Client configuration through `RemoteEmbeddings(...)`:
|
|
|
101
113
|
|
|
102
114
|
If `EMBEDDING_MODEL_NAME` is configured on the server, the server can preload one shared embedding model instance and let multiple applications reuse it. That is what saves VRAM versus loading the same model separately in each application process.
|
|
103
115
|
|
|
104
|
-
`model_kwargs` and `encode_kwargs` become part of the server-side model cache key.
|
|
116
|
+
`model_kwargs` and `encode_kwargs` become part of the server-side model cache key. Different combinations can create different embedding instances. The server evicts older instances once `MAX_LOADED_MODELS` is exceeded, and defaults to keeping one model loaded to protect GPU memory.
|
|
105
117
|
|
|
106
118
|
## Use The Client
|
|
107
119
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "remote-embedding"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.3.0"
|
|
8
8
|
description = "A shared FastAPI embedding service and LangChain-compatible remote client for reusing one embedding model across multiple applications and lowering VRAM usage on limited GPUs."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
[egg_info]
|
|
2
|
-
tag_build =
|
|
3
|
-
tag_date = 0
|
|
4
|
-
|
|
1
|
+
[egg_info]
|
|
2
|
+
tag_build =
|
|
3
|
+
tag_date = 0
|
|
4
|
+
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Public package exports for remote-embedding."""
|
|
2
|
+
|
|
3
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
4
|
+
|
|
5
|
+
from .remote import RemoteEmbeddings
|
|
6
|
+
|
|
7
|
+
__all__ = ["RemoteEmbeddings"]
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
__version__ = version("remote-embedding")
|
|
11
|
+
except PackageNotFoundError:
|
|
12
|
+
__version__ = "0.0.0"
|