fastembed-cloud 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fastembed_cloud/__init__.py +24 -0
- fastembed_cloud/embedding.py +300 -0
- fastembed_cloud/provision.py +175 -0
- fastembed_cloud-0.1.0.dist-info/METADATA +236 -0
- fastembed_cloud-0.1.0.dist-info/RECORD +8 -0
- fastembed_cloud-0.1.0.dist-info/WHEEL +5 -0
- fastembed_cloud-0.1.0.dist-info/licenses/LICENSE +21 -0
- fastembed_cloud-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""
|
|
2
|
+
fastembed-cloud — Cloud embeddings via AINative API.
|
|
3
|
+
|
|
4
|
+
Drop-in replacement for fastembed that generates embeddings via API
|
|
5
|
+
instead of downloading and running ONNX models locally.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
from fastembed_cloud import CloudTextEmbedding
|
|
9
|
+
|
|
10
|
+
model = CloudTextEmbedding()
|
|
11
|
+
embeddings = model.embed(["hello world", "semantic search"])
|
|
12
|
+
|
|
13
|
+
Or use the smart TextEmbedding that prefers local fastembed when installed:
|
|
14
|
+
|
|
15
|
+
from fastembed_cloud import TextEmbedding
|
|
16
|
+
|
|
17
|
+
model = TextEmbedding()
|
|
18
|
+
embeddings = model.embed(["hello world"])
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from fastembed_cloud.embedding import CloudTextEmbedding, TextEmbedding
|
|
22
|
+
|
|
23
|
+
__version__ = "0.1.0"
|
|
24
|
+
__all__ = ["CloudTextEmbedding", "TextEmbedding"]
|
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
"""
|
|
2
|
+
fastembed-cloud — Embedding Classes
|
|
3
|
+
|
|
4
|
+
CloudTextEmbedding: Always uses the AINative API (no local models).
|
|
5
|
+
TextEmbedding: Smart hybrid — uses local fastembed if installed, cloud otherwise.
|
|
6
|
+
|
|
7
|
+
API endpoint: POST /api/v1/public/embeddings/generate
|
|
8
|
+
Request body: {"texts": [...], "model": "bge-m3", "normalize": true}
|
|
9
|
+
Response: {"embeddings": [[...], ...], "model": ..., "dimensions": ..., "count": ...}
|
|
10
|
+
|
|
11
|
+
Refs #3943
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
from typing import Iterator, List, Optional, Union
|
|
16
|
+
|
|
17
|
+
import requests
|
|
18
|
+
|
|
19
|
+
from fastembed_cloud.provision import resolve_api_key
|
|
20
|
+
|
|
21
|
+
# Model aliases — map fastembed model names to AINative model names
|
|
22
|
+
MODEL_MAP = {
|
|
23
|
+
"BAAI/bge-small-en-v1.5": "BAAI/bge-small-en-v1.5",
|
|
24
|
+
"BAAI/bge-base-en-v1.5": "BAAI/bge-base-en-v1.5",
|
|
25
|
+
"BAAI/bge-large-en-v1.5": "BAAI/bge-large-en-v1.5",
|
|
26
|
+
"BAAI/bge-m3": "bge-m3",
|
|
27
|
+
"bge-small": "BAAI/bge-small-en-v1.5",
|
|
28
|
+
"bge-base": "BAAI/bge-base-en-v1.5",
|
|
29
|
+
"bge-large": "BAAI/bge-large-en-v1.5",
|
|
30
|
+
"bge-m3": "bge-m3",
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
# Model dimensions for reference
|
|
34
|
+
MODEL_DIMENSIONS = {
|
|
35
|
+
"BAAI/bge-small-en-v1.5": 384,
|
|
36
|
+
"BAAI/bge-base-en-v1.5": 768,
|
|
37
|
+
"BAAI/bge-large-en-v1.5": 1024,
|
|
38
|
+
"bge-m3": 1024,
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
DEFAULT_MODEL = "BAAI/bge-small-en-v1.5"
|
|
42
|
+
DEFAULT_BASE_URL = "https://api.ainative.studio"
|
|
43
|
+
GENERATE_PATH = "/api/v1/public/embeddings/generate"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _chunked(iterable, size):
|
|
47
|
+
"""Split an iterable into chunks of the given size."""
|
|
48
|
+
batch = []
|
|
49
|
+
for item in iterable:
|
|
50
|
+
batch.append(item)
|
|
51
|
+
if len(batch) >= size:
|
|
52
|
+
yield batch
|
|
53
|
+
batch = []
|
|
54
|
+
if batch:
|
|
55
|
+
yield batch
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class CloudTextEmbedding:
|
|
59
|
+
"""
|
|
60
|
+
Generate text embeddings via AINative's free Embeddings API.
|
|
61
|
+
|
|
62
|
+
Drop-in replacement for fastembed.TextEmbedding — same interface,
|
|
63
|
+
no local model downloads, no ONNX runtime needed.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
model_name: Model to use. Defaults to BAAI/bge-small-en-v1.5 (384d).
|
|
67
|
+
Supported: bge-small (384d), bge-base (768d), bge-large (1024d), bge-m3 (1024d).
|
|
68
|
+
api_key: AINative API key. Auto-resolved from env/config/provisioning if not set.
|
|
69
|
+
base_url: API base URL. Defaults to https://api.ainative.studio.
|
|
70
|
+
batch_size: Max texts per API call. Defaults to 64.
|
|
71
|
+
normalize: Normalize embeddings to unit length. Defaults to True.
|
|
72
|
+
|
|
73
|
+
Example:
|
|
74
|
+
>>> from fastembed_cloud import CloudTextEmbedding
|
|
75
|
+
>>> model = CloudTextEmbedding()
|
|
76
|
+
>>> embeddings = list(model.embed(["hello world", "semantic search"]))
|
|
77
|
+
>>> len(embeddings[0]) # 384 dimensions
|
|
78
|
+
384
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
def __init__(
|
|
82
|
+
self,
|
|
83
|
+
model_name: str = DEFAULT_MODEL,
|
|
84
|
+
api_key: Optional[str] = None,
|
|
85
|
+
base_url: Optional[str] = None,
|
|
86
|
+
batch_size: int = 64,
|
|
87
|
+
normalize: bool = True,
|
|
88
|
+
**kwargs, # Accept extra kwargs for fastembed compat
|
|
89
|
+
):
|
|
90
|
+
self.model_name = MODEL_MAP.get(model_name, model_name)
|
|
91
|
+
self._api_key = resolve_api_key(api_key)
|
|
92
|
+
self._base_url = (
|
|
93
|
+
base_url
|
|
94
|
+
or os.environ.get("AINATIVE_API_URL")
|
|
95
|
+
or os.environ.get("ZERODB_API_URL")
|
|
96
|
+
or DEFAULT_BASE_URL
|
|
97
|
+
)
|
|
98
|
+
self._batch_size = batch_size
|
|
99
|
+
self._normalize = normalize
|
|
100
|
+
self._session = requests.Session()
|
|
101
|
+
self._session.headers.update({
|
|
102
|
+
"x-api-key": self._api_key,
|
|
103
|
+
"Content-Type": "application/json",
|
|
104
|
+
})
|
|
105
|
+
|
|
106
|
+
@property
|
|
107
|
+
def dim(self) -> int:
|
|
108
|
+
"""Return the embedding dimension for the current model."""
|
|
109
|
+
return MODEL_DIMENSIONS.get(self.model_name, 384)
|
|
110
|
+
|
|
111
|
+
def embed(
|
|
112
|
+
self,
|
|
113
|
+
documents: Union[List[str], Iterator[str]],
|
|
114
|
+
batch_size: Optional[int] = None,
|
|
115
|
+
**kwargs,
|
|
116
|
+
) -> List[List[float]]:
|
|
117
|
+
"""
|
|
118
|
+
Embed a list of documents.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
documents: List of text strings to embed (max 100 per batch).
|
|
122
|
+
batch_size: Override default batch size.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
List of embedding vectors (each a list of floats).
|
|
126
|
+
|
|
127
|
+
Raises:
|
|
128
|
+
RuntimeError: If the API returns an error.
|
|
129
|
+
"""
|
|
130
|
+
docs = list(documents)
|
|
131
|
+
if not docs:
|
|
132
|
+
return []
|
|
133
|
+
|
|
134
|
+
bs = batch_size or self._batch_size
|
|
135
|
+
results = []
|
|
136
|
+
|
|
137
|
+
for batch in _chunked(docs, min(bs, 100)):
|
|
138
|
+
embeddings = self._call_api(batch)
|
|
139
|
+
results.extend(embeddings)
|
|
140
|
+
|
|
141
|
+
return results
|
|
142
|
+
|
|
143
|
+
def query_embed(self, query: str, **kwargs) -> List[float]:
|
|
144
|
+
"""
|
|
145
|
+
Embed a single query string.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
query: Text to embed.
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
Embedding vector as a list of floats.
|
|
152
|
+
"""
|
|
153
|
+
result = self.embed([query])
|
|
154
|
+
if not result:
|
|
155
|
+
raise RuntimeError("Empty response from embedding API")
|
|
156
|
+
return result[0]
|
|
157
|
+
|
|
158
|
+
def passage_embed(self, texts: Union[List[str], Iterator[str]], **kwargs) -> List[List[float]]:
|
|
159
|
+
"""
|
|
160
|
+
Embed passages (alias for embed, provided for fastembed compatibility).
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
texts: List of passage strings.
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
List of embedding vectors.
|
|
167
|
+
"""
|
|
168
|
+
return self.embed(texts, **kwargs)
|
|
169
|
+
|
|
170
|
+
def _call_api(self, texts: List[str]) -> List[List[float]]:
|
|
171
|
+
"""
|
|
172
|
+
Call the AINative embeddings API.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
texts: Batch of texts to embed.
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
List of embedding vectors.
|
|
179
|
+
|
|
180
|
+
Raises:
|
|
181
|
+
RuntimeError: On API error.
|
|
182
|
+
"""
|
|
183
|
+
url = f"{self._base_url}{GENERATE_PATH}"
|
|
184
|
+
payload = {
|
|
185
|
+
"texts": texts,
|
|
186
|
+
"model": self.model_name,
|
|
187
|
+
"normalize": self._normalize,
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
try:
|
|
191
|
+
resp = self._session.post(url, json=payload, timeout=30)
|
|
192
|
+
except requests.RequestException as exc:
|
|
193
|
+
raise RuntimeError(f"Embedding API request failed: {exc}") from exc
|
|
194
|
+
|
|
195
|
+
if resp.status_code == 401:
|
|
196
|
+
raise RuntimeError(
|
|
197
|
+
"Invalid API key. Set AINATIVE_API_KEY or pass api_key= parameter."
|
|
198
|
+
)
|
|
199
|
+
if resp.status_code == 429:
|
|
200
|
+
raise RuntimeError(
|
|
201
|
+
"Rate limited. Wait a moment and try again, or upgrade at https://ainative.studio"
|
|
202
|
+
)
|
|
203
|
+
if resp.status_code != 200:
|
|
204
|
+
detail = resp.text[:200] if resp.text else "Unknown error"
|
|
205
|
+
raise RuntimeError(
|
|
206
|
+
f"Embedding API error (HTTP {resp.status_code}): {detail}"
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
data = resp.json()
|
|
210
|
+
embeddings = data.get("embeddings")
|
|
211
|
+
if embeddings is None:
|
|
212
|
+
raise RuntimeError(f"Unexpected API response format: {list(data.keys())}")
|
|
213
|
+
|
|
214
|
+
return embeddings
|
|
215
|
+
|
|
216
|
+
def __repr__(self) -> str:
|
|
217
|
+
return f"CloudTextEmbedding(model={self.model_name!r}, dim={self.dim})"
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
class TextEmbedding:
|
|
221
|
+
"""
|
|
222
|
+
Smart embedding: uses local fastembed if installed, falls back to cloud.
|
|
223
|
+
|
|
224
|
+
This is a hybrid class that checks for the `fastembed` package at init time.
|
|
225
|
+
If fastembed is available, it uses local ONNX inference (faster, offline).
|
|
226
|
+
If not, it transparently falls back to AINative's cloud API (no downloads).
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
model_name: Model to use. Defaults to BAAI/bge-small-en-v1.5.
|
|
230
|
+
api_key: AINative API key (only used if falling back to cloud).
|
|
231
|
+
**kwargs: Additional kwargs passed to the underlying embedding class.
|
|
232
|
+
|
|
233
|
+
Example:
|
|
234
|
+
>>> from fastembed_cloud import TextEmbedding
|
|
235
|
+
>>> model = TextEmbedding() # Uses local if fastembed installed, cloud otherwise
|
|
236
|
+
>>> embeddings = list(model.embed(["hello world"]))
|
|
237
|
+
"""
|
|
238
|
+
|
|
239
|
+
def __init__(
|
|
240
|
+
self,
|
|
241
|
+
model_name: str = DEFAULT_MODEL,
|
|
242
|
+
api_key: Optional[str] = None,
|
|
243
|
+
**kwargs,
|
|
244
|
+
):
|
|
245
|
+
self._local = None
|
|
246
|
+
self._cloud = None
|
|
247
|
+
|
|
248
|
+
try:
|
|
249
|
+
from fastembed import TextEmbedding as LocalTextEmbedding
|
|
250
|
+
self._local = LocalTextEmbedding(model_name=model_name, **kwargs)
|
|
251
|
+
except ImportError:
|
|
252
|
+
self._cloud = CloudTextEmbedding(
|
|
253
|
+
model_name=model_name, api_key=api_key, **kwargs
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
@property
|
|
257
|
+
def is_cloud(self) -> bool:
|
|
258
|
+
"""True if using cloud API, False if using local fastembed."""
|
|
259
|
+
return self._cloud is not None
|
|
260
|
+
|
|
261
|
+
@property
|
|
262
|
+
def dim(self) -> int:
|
|
263
|
+
"""Return the embedding dimension for the current model."""
|
|
264
|
+
if self._cloud:
|
|
265
|
+
return self._cloud.dim
|
|
266
|
+
# Local fastembed doesn't always expose dim, use our map
|
|
267
|
+
model_name = getattr(self._local, "model_name", DEFAULT_MODEL)
|
|
268
|
+
return MODEL_DIMENSIONS.get(model_name, 384)
|
|
269
|
+
|
|
270
|
+
def embed(self, documents: Union[List[str], Iterator[str]], **kwargs) -> List[List[float]]:
|
|
271
|
+
"""
|
|
272
|
+
Embed documents using the best available backend.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
documents: Texts to embed.
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
List of embedding vectors.
|
|
279
|
+
"""
|
|
280
|
+
if self._cloud:
|
|
281
|
+
return self._cloud.embed(documents, **kwargs)
|
|
282
|
+
# Local fastembed returns a generator of numpy arrays
|
|
283
|
+
return [emb.tolist() for emb in self._local.embed(list(documents), **kwargs)]
|
|
284
|
+
|
|
285
|
+
def query_embed(self, query: str, **kwargs) -> List[float]:
|
|
286
|
+
"""Embed a single query."""
|
|
287
|
+
if self._cloud:
|
|
288
|
+
return self._cloud.query_embed(query, **kwargs)
|
|
289
|
+
result = list(self._local.query_embed(query))
|
|
290
|
+
if hasattr(result[0], "tolist"):
|
|
291
|
+
return result[0].tolist()
|
|
292
|
+
return result[0]
|
|
293
|
+
|
|
294
|
+
def passage_embed(self, texts: Union[List[str], Iterator[str]], **kwargs) -> List[List[float]]:
|
|
295
|
+
"""Embed passages."""
|
|
296
|
+
return self.embed(texts, **kwargs)
|
|
297
|
+
|
|
298
|
+
def __repr__(self) -> str:
|
|
299
|
+
backend = "local" if self._local else "cloud"
|
|
300
|
+
return f"TextEmbedding(backend={backend!r})"
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
"""
|
|
2
|
+
fastembed-cloud — Auto-Provisioning
|
|
3
|
+
|
|
4
|
+
Zero-friction cloud provisioning for embedding users.
|
|
5
|
+
On first use, if no API key is found, automatically provisions
|
|
6
|
+
a free AINative account (72-hour TTL) that can be claimed later.
|
|
7
|
+
|
|
8
|
+
Credential resolution order:
|
|
9
|
+
1. Explicit api_key parameter
|
|
10
|
+
2. AINATIVE_API_KEY environment variable
|
|
11
|
+
3. ZERODB_API_KEY environment variable (shared with zerodb ecosystem)
|
|
12
|
+
4. ~/.zerodb/credentials.json (shared credential store)
|
|
13
|
+
5. Auto-provision via /api/v1/public/instant-db
|
|
14
|
+
|
|
15
|
+
Refs #3943
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import json
|
|
19
|
+
import os
|
|
20
|
+
import sys
|
|
21
|
+
from datetime import datetime
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import Optional
|
|
24
|
+
|
|
25
|
+
ZERODB_DIR = Path.home() / ".zerodb"
|
|
26
|
+
CREDS_PATH = ZERODB_DIR / "credentials.json"
|
|
27
|
+
CONFIG_PATH = ZERODB_DIR / "config.json"
|
|
28
|
+
CLOUD_API_URL = "https://api.ainative.studio"
|
|
29
|
+
PROVISION_ENDPOINT = "/api/v1/public/instant-db"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def resolve_api_key(explicit_key: Optional[str] = None) -> str:
|
|
33
|
+
"""
|
|
34
|
+
Resolve an API key from all available sources.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
explicit_key: Key passed directly by the user.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
A valid API key string.
|
|
41
|
+
|
|
42
|
+
Raises:
|
|
43
|
+
RuntimeError: If auto-provisioning fails and no key is available.
|
|
44
|
+
"""
|
|
45
|
+
# 1. Explicit parameter
|
|
46
|
+
if explicit_key:
|
|
47
|
+
return explicit_key
|
|
48
|
+
|
|
49
|
+
# 2. Environment variables
|
|
50
|
+
env_key = (
|
|
51
|
+
os.environ.get("AINATIVE_API_KEY")
|
|
52
|
+
or os.environ.get("ZERODB_API_KEY")
|
|
53
|
+
)
|
|
54
|
+
if env_key:
|
|
55
|
+
return env_key
|
|
56
|
+
|
|
57
|
+
# 3. Credentials file (shared with zerodb ecosystem)
|
|
58
|
+
creds = _load_credentials()
|
|
59
|
+
if creds:
|
|
60
|
+
return creds
|
|
61
|
+
|
|
62
|
+
# 4. Auto-provision
|
|
63
|
+
return _auto_provision()
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _load_credentials() -> Optional[str]:
|
|
67
|
+
"""Load API key from ~/.zerodb/credentials.json."""
|
|
68
|
+
if CREDS_PATH.exists():
|
|
69
|
+
try:
|
|
70
|
+
data = json.loads(CREDS_PATH.read_text())
|
|
71
|
+
key = data.get("api_key")
|
|
72
|
+
if key:
|
|
73
|
+
return key
|
|
74
|
+
except (json.JSONDecodeError, KeyError):
|
|
75
|
+
pass
|
|
76
|
+
|
|
77
|
+
if CONFIG_PATH.exists():
|
|
78
|
+
try:
|
|
79
|
+
data = json.loads(CONFIG_PATH.read_text())
|
|
80
|
+
key = data.get("api_key") or data.get("cloud_api_key")
|
|
81
|
+
if key:
|
|
82
|
+
return key
|
|
83
|
+
except (json.JSONDecodeError, KeyError):
|
|
84
|
+
pass
|
|
85
|
+
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _auto_provision() -> str:
|
|
90
|
+
"""
|
|
91
|
+
Auto-provision a free AINative account for embedding access.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
API key string.
|
|
95
|
+
|
|
96
|
+
Raises:
|
|
97
|
+
RuntimeError: If provisioning fails.
|
|
98
|
+
"""
|
|
99
|
+
print(
|
|
100
|
+
"\n No API key found — provisioning a free AINative account for embeddings...",
|
|
101
|
+
file=sys.stderr,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
import requests
|
|
105
|
+
|
|
106
|
+
try:
|
|
107
|
+
resp = requests.post(
|
|
108
|
+
f"{CLOUD_API_URL}{PROVISION_ENDPOINT}",
|
|
109
|
+
json={"agree_terms": True, "source": "fastembed-cloud"},
|
|
110
|
+
timeout=15,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
if resp.status_code == 429:
|
|
114
|
+
raise RuntimeError(
|
|
115
|
+
"Rate limited — too many provisions from this IP. "
|
|
116
|
+
"Sign up at https://ainative.studio/signup and set AINATIVE_API_KEY."
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
if resp.status_code not in (200, 201):
|
|
120
|
+
raise RuntimeError(
|
|
121
|
+
f"Provisioning failed (HTTP {resp.status_code}). "
|
|
122
|
+
"Sign up at https://ainative.studio/signup and set AINATIVE_API_KEY."
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
data = resp.json()
|
|
126
|
+
api_key = data.get("api_key", "")
|
|
127
|
+
if not api_key:
|
|
128
|
+
raise RuntimeError("Provisioning returned empty API key.")
|
|
129
|
+
|
|
130
|
+
# Save credentials for reuse
|
|
131
|
+
_save_credentials(data)
|
|
132
|
+
_print_success(data)
|
|
133
|
+
return api_key
|
|
134
|
+
|
|
135
|
+
except requests.RequestException as exc:
|
|
136
|
+
raise RuntimeError(
|
|
137
|
+
f"Network error during provisioning: {exc}. "
|
|
138
|
+
"Set AINATIVE_API_KEY manually or sign up at https://ainative.studio/signup"
|
|
139
|
+
) from exc
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _save_credentials(data: dict) -> None:
|
|
143
|
+
"""Save provisioned credentials to ~/.zerodb/ for ecosystem sharing."""
|
|
144
|
+
ZERODB_DIR.mkdir(parents=True, exist_ok=True)
|
|
145
|
+
|
|
146
|
+
creds = {
|
|
147
|
+
"api_key": data.get("api_key", ""),
|
|
148
|
+
"project_id": data.get("project_id", ""),
|
|
149
|
+
"base_url": data.get("base_url", CLOUD_API_URL),
|
|
150
|
+
"expires_at": data.get("expires_at", ""),
|
|
151
|
+
"claim_url": data.get("claim_url", ""),
|
|
152
|
+
"provisioned_at": datetime.utcnow().isoformat(),
|
|
153
|
+
"source": "fastembed-cloud",
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
CREDS_PATH.write_text(json.dumps(creds, indent=2) + "\n")
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _print_success(data: dict) -> None:
|
|
160
|
+
"""Print success message with claim URL."""
|
|
161
|
+
expires = data.get("expires_at", "72 hours")
|
|
162
|
+
claim_url = data.get("claim_url", "https://ainative.studio/signup")
|
|
163
|
+
api_key = data.get("api_key", "")
|
|
164
|
+
|
|
165
|
+
print(
|
|
166
|
+
f"\n Auto-provisioned! Free embeddings API ready.\n"
|
|
167
|
+
f"\n"
|
|
168
|
+
f" API Key: {api_key[:12]}...\n"
|
|
169
|
+
f" Expires: {expires}\n"
|
|
170
|
+
f" Saved to: ~/.zerodb/credentials.json\n"
|
|
171
|
+
f"\n"
|
|
172
|
+
f" To keep access permanently, claim your account:\n"
|
|
173
|
+
f" {claim_url}\n",
|
|
174
|
+
file=sys.stderr,
|
|
175
|
+
)
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fastembed-cloud
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Cloud embeddings via AINative API — drop-in fastembed replacement, no model downloads
|
|
5
|
+
Author-email: AINative Studio <support@ainative.studio>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/AINative-Studio/fastembed-cloud
|
|
8
|
+
Project-URL: Documentation, https://docs.ainative.studio/embeddings
|
|
9
|
+
Project-URL: Repository, https://github.com/AINative-Studio/fastembed-cloud
|
|
10
|
+
Project-URL: Issues, https://github.com/AINative-Studio/fastembed-cloud/issues
|
|
11
|
+
Keywords: fastembed,fastembed-alternative,embeddings,text-embeddings,cloud-embeddings,free-embeddings,ainative,zerodb,bge,baai,onnx-alternative,vector-embeddings,semantic-search,similarity-search,sentence-transformers-alternative,auto-provisioning,serverless,no-download,claude,cursor,windsurf,rag,retrieval-augmented-generation
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
24
|
+
Requires-Python: >=3.8
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
License-File: LICENSE
|
|
27
|
+
Requires-Dist: requests>=2.28
|
|
28
|
+
Provides-Extra: local
|
|
29
|
+
Requires-Dist: fastembed>=0.3.0; extra == "local"
|
|
30
|
+
Provides-Extra: numpy
|
|
31
|
+
Requires-Dist: numpy>=1.20; extra == "numpy"
|
|
32
|
+
Provides-Extra: dev
|
|
33
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
34
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
35
|
+
Requires-Dist: responses>=0.23; extra == "dev"
|
|
36
|
+
Dynamic: license-file
|
|
37
|
+
|
|
38
|
+
# fastembed-cloud
|
|
39
|
+
|
|
40
|
+
Cloud text embeddings via AINative API. Drop-in replacement for [fastembed](https://github.com/qdrant/fastembed) — same interface, no model downloads, no ONNX runtime.
|
|
41
|
+
|
|
42
|
+
## Why?
|
|
43
|
+
|
|
44
|
+
fastembed is great, but requires downloading 100MB+ ONNX models locally. `fastembed-cloud` gives you the same API backed by a free cloud service — zero setup, zero downloads.
|
|
45
|
+
|
|
46
|
+
| | fastembed | fastembed-cloud |
|
|
47
|
+
|---|---|---|
|
|
48
|
+
| First-run latency | 30-120s (model download) | 0s |
|
|
49
|
+
| Disk usage | 100MB-1GB per model | 0 |
|
|
50
|
+
| ONNX runtime | Required | Not needed |
|
|
51
|
+
| Offline support | Yes | No (cloud API) |
|
|
52
|
+
| Cost | Free (local compute) | Free (AINative API) |
|
|
53
|
+
|
|
54
|
+
## Install
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
pip install fastembed-cloud
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Quick Start
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
from fastembed_cloud import CloudTextEmbedding
|
|
64
|
+
|
|
65
|
+
# Auto-provisions a free API key on first run
|
|
66
|
+
model = CloudTextEmbedding()
|
|
67
|
+
|
|
68
|
+
# Single query
|
|
69
|
+
embedding = model.query_embed("What is semantic search?")
|
|
70
|
+
print(f"Dimensions: {len(embedding)}") # 384
|
|
71
|
+
|
|
72
|
+
# Batch embedding
|
|
73
|
+
docs = ["First document", "Second document", "Third document"]
|
|
74
|
+
embeddings = model.embed(docs)
|
|
75
|
+
print(f"Embedded {len(embeddings)} documents")
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## Models
|
|
79
|
+
|
|
80
|
+
| Model | Dimensions | ID |
|
|
81
|
+
|---|---|---|
|
|
82
|
+
| BGE-small-en-v1.5 | 384 | `BAAI/bge-small-en-v1.5` (default) |
|
|
83
|
+
| BGE-base-en-v1.5 | 768 | `BAAI/bge-base-en-v1.5` |
|
|
84
|
+
| BGE-large-en-v1.5 | 1024 | `BAAI/bge-large-en-v1.5` |
|
|
85
|
+
| BGE-M3 (multilingual) | 1024 | `bge-m3` |
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
model = CloudTextEmbedding(model_name="bge-m3")
|
|
89
|
+
embedding = model.query_embed("Multilingual embedding")
|
|
90
|
+
print(len(embedding)) # 1024
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Smart Hybrid: Local + Cloud
|
|
94
|
+
|
|
95
|
+
`TextEmbedding` automatically uses local fastembed if installed, cloud otherwise:
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
from fastembed_cloud import TextEmbedding
|
|
99
|
+
|
|
100
|
+
model = TextEmbedding()
|
|
101
|
+
print(model.is_cloud) # True if fastembed not installed
|
|
102
|
+
|
|
103
|
+
embeddings = model.embed(["works either way"])
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
Install fastembed alongside for local-first with cloud fallback:
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
pip install fastembed-cloud[local]
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## Authentication
|
|
113
|
+
|
|
114
|
+
Credentials are resolved in this order:
|
|
115
|
+
|
|
116
|
+
1. `api_key` parameter: `CloudTextEmbedding(api_key="your-key")`
|
|
117
|
+
2. `AINATIVE_API_KEY` environment variable
|
|
118
|
+
3. `ZERODB_API_KEY` environment variable (shared with ZeroDB ecosystem)
|
|
119
|
+
4. `~/.zerodb/credentials.json` (auto-saved from any ZeroDB tool)
|
|
120
|
+
5. Auto-provisioning (free 72-hour account, claim to keep permanently)
|
|
121
|
+
|
|
122
|
+
### Auto-Provisioning
|
|
123
|
+
|
|
124
|
+
On first use with no credentials, fastembed-cloud automatically provisions a free account:
|
|
125
|
+
|
|
126
|
+
```
|
|
127
|
+
$ python -c "from fastembed_cloud import CloudTextEmbedding; CloudTextEmbedding().query_embed('test')"
|
|
128
|
+
|
|
129
|
+
No API key found — provisioning a free AINative account for embeddings...
|
|
130
|
+
|
|
131
|
+
Auto-provisioned! Free embeddings API ready.
|
|
132
|
+
|
|
133
|
+
API Key: zdb_abc12345...
|
|
134
|
+
Expires: 72 hours
|
|
135
|
+
Saved to: ~/.zerodb/credentials.json
|
|
136
|
+
|
|
137
|
+
To keep access permanently, claim your account:
|
|
138
|
+
https://ainative.studio/signup
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## Batch Embedding
|
|
142
|
+
|
|
143
|
+
Handles large datasets efficiently with automatic batching:
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
model = CloudTextEmbedding(batch_size=100)
|
|
147
|
+
|
|
148
|
+
# Automatically batches into chunks of 100
|
|
149
|
+
large_dataset = ["document " + str(i) for i in range(10000)]
|
|
150
|
+
embeddings = model.embed(large_dataset)
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## Use with Vector Databases
|
|
154
|
+
|
|
155
|
+
### Qdrant
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
from qdrant_client import QdrantClient
|
|
159
|
+
from fastembed_cloud import CloudTextEmbedding
|
|
160
|
+
|
|
161
|
+
client = QdrantClient(":memory:")
|
|
162
|
+
model = CloudTextEmbedding()
|
|
163
|
+
|
|
164
|
+
docs = ["AI is transforming healthcare", "Machine learning for finance"]
|
|
165
|
+
embeddings = model.embed(docs)
|
|
166
|
+
|
|
167
|
+
client.add(
|
|
168
|
+
collection_name="my_docs",
|
|
169
|
+
documents=docs,
|
|
170
|
+
embeddings=embeddings,
|
|
171
|
+
)
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
### ChromaDB
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
import chromadb
|
|
178
|
+
from fastembed_cloud import CloudTextEmbedding
|
|
179
|
+
|
|
180
|
+
client = chromadb.Client()
|
|
181
|
+
collection = client.create_collection("my_docs")
|
|
182
|
+
model = CloudTextEmbedding()
|
|
183
|
+
|
|
184
|
+
docs = ["First doc", "Second doc"]
|
|
185
|
+
embeddings = model.embed(docs)
|
|
186
|
+
|
|
187
|
+
collection.add(
|
|
188
|
+
documents=docs,
|
|
189
|
+
embeddings=embeddings,
|
|
190
|
+
ids=["id1", "id2"],
|
|
191
|
+
)
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
## API Reference
|
|
195
|
+
|
|
196
|
+
### CloudTextEmbedding
|
|
197
|
+
|
|
198
|
+
Always uses the cloud API.
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
CloudTextEmbedding(
|
|
202
|
+
model_name="BAAI/bge-small-en-v1.5", # Model to use
|
|
203
|
+
api_key=None, # API key (auto-resolved)
|
|
204
|
+
base_url=None, # API URL (default: api.ainative.studio)
|
|
205
|
+
batch_size=64, # Max texts per API call
|
|
206
|
+
normalize=True, # Normalize to unit vectors
|
|
207
|
+
)
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
**Methods:**
|
|
211
|
+
- `embed(documents, batch_size=None)` — Embed a list of texts. Returns `list[list[float]]`.
|
|
212
|
+
- `query_embed(query)` — Embed a single query. Returns `list[float]`.
|
|
213
|
+
- `passage_embed(texts)` — Alias for `embed()` (fastembed compatibility).
|
|
214
|
+
|
|
215
|
+
**Properties:**
|
|
216
|
+
- `dim` — Embedding dimensions (e.g., 384 for bge-small).
|
|
217
|
+
- `model_name` — Resolved model name.
|
|
218
|
+
|
|
219
|
+
### TextEmbedding
|
|
220
|
+
|
|
221
|
+
Smart hybrid: local fastembed if available, cloud otherwise.
|
|
222
|
+
|
|
223
|
+
```python
|
|
224
|
+
TextEmbedding(
|
|
225
|
+
model_name="BAAI/bge-small-en-v1.5",
|
|
226
|
+
api_key=None,
|
|
227
|
+
**kwargs,
|
|
228
|
+
)
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
**Properties:**
|
|
232
|
+
- `is_cloud` — `True` if using cloud API, `False` if using local fastembed.
|
|
233
|
+
|
|
234
|
+
## License
|
|
235
|
+
|
|
236
|
+
MIT
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
fastembed_cloud/__init__.py,sha256=-rzkqUNYE57RWvFVzK2RmhRNZjlT73a5ebNlesB3Kz8,691
|
|
2
|
+
fastembed_cloud/embedding.py,sha256=HrddbQcB6RK4CyfpefN3-3IuEMh-8UuNATR0V1KaJv0,9757
|
|
3
|
+
fastembed_cloud/provision.py,sha256=TDvw3mGa5c8LnjhQ1JWuaqVcBQoC3lhB1oB05ltn054,5036
|
|
4
|
+
fastembed_cloud-0.1.0.dist-info/licenses/LICENSE,sha256=-3M2h1U80S6mPyiuvRG25A0l1xZGpa7eO43lysBIzBY,1072
|
|
5
|
+
fastembed_cloud-0.1.0.dist-info/METADATA,sha256=iNLn8UxhdB8HmKADpR4G-PeOR7KmA2htXnIRm3uqqvc,6984
|
|
6
|
+
fastembed_cloud-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
7
|
+
fastembed_cloud-0.1.0.dist-info/top_level.txt,sha256=YnPhtvONgzU9-vZccnMcaCYe9m548-fj_rWnmBN9sd8,16
|
|
8
|
+
fastembed_cloud-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 AINative Studio
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
fastembed_cloud
|