hammad-python 0.0.11__py3-none-any.whl → 0.0.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hammad/__init__.py +169 -56
- hammad/_core/__init__.py +1 -0
- hammad/_core/_utils/__init__.py +4 -0
- hammad/_core/_utils/_import_utils.py +182 -0
- hammad/ai/__init__.py +59 -0
- hammad/ai/_utils.py +142 -0
- hammad/ai/completions/__init__.py +44 -0
- hammad/ai/completions/client.py +729 -0
- hammad/ai/completions/create.py +686 -0
- hammad/ai/completions/types.py +711 -0
- hammad/ai/completions/utils.py +374 -0
- hammad/ai/embeddings/__init__.py +35 -0
- hammad/ai/embeddings/client/__init__.py +1 -0
- hammad/ai/embeddings/client/base_embeddings_client.py +26 -0
- hammad/ai/embeddings/client/fastembed_text_embeddings_client.py +200 -0
- hammad/ai/embeddings/client/litellm_embeddings_client.py +288 -0
- hammad/ai/embeddings/create.py +159 -0
- hammad/ai/embeddings/types.py +69 -0
- hammad/base/__init__.py +35 -0
- hammad/{based → base}/fields.py +23 -23
- hammad/{based → base}/model.py +124 -14
- hammad/base/utils.py +280 -0
- hammad/cache/__init__.py +30 -12
- hammad/cache/base_cache.py +181 -0
- hammad/cache/cache.py +169 -0
- hammad/cache/decorators.py +261 -0
- hammad/cache/file_cache.py +80 -0
- hammad/cache/ttl_cache.py +74 -0
- hammad/cli/__init__.py +10 -2
- hammad/cli/{styles/animations.py → animations.py} +79 -23
- hammad/cli/{plugins/__init__.py → plugins.py} +85 -90
- hammad/cli/styles/__init__.py +50 -0
- hammad/cli/styles/settings.py +4 -0
- hammad/configuration/__init__.py +35 -0
- hammad/{data/types/files → configuration}/configuration.py +96 -7
- hammad/data/__init__.py +14 -26
- hammad/data/collections/__init__.py +4 -2
- hammad/data/collections/collection.py +300 -75
- hammad/data/collections/vector_collection.py +118 -12
- hammad/data/databases/__init__.py +2 -2
- hammad/data/databases/database.py +383 -32
- hammad/json/__init__.py +2 -2
- hammad/logging/__init__.py +13 -5
- hammad/logging/decorators.py +404 -2
- hammad/logging/logger.py +442 -22
- hammad/multimodal/__init__.py +24 -0
- hammad/{data/types/files → multimodal}/audio.py +21 -6
- hammad/{data/types/files → multimodal}/image.py +5 -5
- hammad/multithreading/__init__.py +304 -0
- hammad/pydantic/__init__.py +2 -2
- hammad/pydantic/converters.py +1 -1
- hammad/pydantic/models/__init__.py +2 -2
- hammad/text/__init__.py +59 -14
- hammad/text/converters.py +723 -0
- hammad/text/{utils/markdown/formatting.py → markdown.py} +25 -23
- hammad/text/text.py +12 -14
- hammad/types/__init__.py +11 -0
- hammad/{data/types/files → types}/file.py +18 -18
- hammad/typing/__init__.py +138 -84
- hammad/web/__init__.py +3 -2
- hammad/web/models.py +245 -0
- hammad/web/search/client.py +75 -23
- hammad/web/utils.py +14 -5
- hammad/yaml/__init__.py +2 -2
- hammad/yaml/converters.py +1 -1
- {hammad_python-0.0.11.dist-info → hammad_python-0.0.13.dist-info}/METADATA +4 -1
- hammad_python-0.0.13.dist-info/RECORD +85 -0
- hammad/based/__init__.py +0 -52
- hammad/based/utils.py +0 -455
- hammad/cache/_cache.py +0 -746
- hammad/data/types/__init__.py +0 -33
- hammad/data/types/files/__init__.py +0 -1
- hammad/data/types/files/document.py +0 -195
- hammad/text/utils/__init__.py +0 -1
- hammad/text/utils/converters.py +0 -229
- hammad/text/utils/markdown/__init__.py +0 -1
- hammad/text/utils/markdown/converters.py +0 -506
- hammad_python-0.0.11.dist-info/RECORD +0 -65
- {hammad_python-0.0.11.dist-info → hammad_python-0.0.13.dist-info}/WHEEL +0 -0
- {hammad_python-0.0.11.dist-info → hammad_python-0.0.13.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,288 @@
|
|
1
|
+
"""hammad.ai.embeddings.client.litellm_embeddings_client"""
|
2
|
+
|
3
|
+
from typing import Any, List, Literal, Optional
|
4
|
+
import sys
|
5
|
+
|
6
|
+
if sys.version_info >= (3, 12):
|
7
|
+
from typing import TypedDict
|
8
|
+
else:
|
9
|
+
from typing_extensions import TypedDict
|
10
|
+
|
11
|
+
|
12
|
+
from .base_embeddings_client import BaseEmbeddingsClient
|
13
|
+
from ..types import (
|
14
|
+
Embedding,
|
15
|
+
EmbeddingUsage,
|
16
|
+
EmbeddingResponse,
|
17
|
+
)
|
18
|
+
from ....text.converters import convert_to_text
|
19
|
+
from ..._utils import get_litellm
|
20
|
+
|
21
|
+
__all__ = (
|
22
|
+
"LiteLlmEmbeddingsClient",
|
23
|
+
"LiteLlmEmbeddingModel",
|
24
|
+
"LiteLlmEmbeddingModelSettings",
|
25
|
+
)
|
26
|
+
|
27
|
+
|
28
|
+
LiteLlmEmbeddingModel = Literal[
|
29
|
+
# OpenAI Embedding Models
|
30
|
+
"text-embedding-3-small",
|
31
|
+
"text-embedding-3-large",
|
32
|
+
"text-embedding-ada-002",
|
33
|
+
# OpenAI Compatible Embedding Models
|
34
|
+
"openai/text-embedding-3-small",
|
35
|
+
"openai/text-embedding-3-large",
|
36
|
+
"openai/text-embedding-ada-002",
|
37
|
+
# Bedrock Embedding Models
|
38
|
+
"amazon.titan-embed-text-v1",
|
39
|
+
"cohere.embed-english-v3",
|
40
|
+
"cohere.embed-multilingual-v3",
|
41
|
+
# Cohere Embedding Models
|
42
|
+
"embed-english-v3.0",
|
43
|
+
"embed-english-light-v3.0",
|
44
|
+
"embed-multilingual-v3.0",
|
45
|
+
"embed-multilingual-light-v3.0",
|
46
|
+
"embed-english-v2.0",
|
47
|
+
"embed-english-light-v2.0",
|
48
|
+
"embed-multilingual-v2.0",
|
49
|
+
# NVIDIA NIM Embedding Models
|
50
|
+
"nvidia_nim/NV-Embed-QA",
|
51
|
+
"nvidia_nim/nvidia/nv-embed-v1",
|
52
|
+
"nvidia_nim/nvidia/nv-embedqa-mistral-7b-v2",
|
53
|
+
"nvidia_nim/nvidia/nv-embedqa-e5-v5",
|
54
|
+
"nvidia_nim/nvidia/embed-qa-4",
|
55
|
+
"nvidia_nim/nvidia/llama-3.2-nv-embedqa-1b-v1",
|
56
|
+
"nvidia_nim/nvidia/llama-3.2-nv-embedqa-1b-v2",
|
57
|
+
"nvidia_nim/snowflake/arctic-embed-l",
|
58
|
+
"nvidia_nim/baai/bge-m3",
|
59
|
+
# HuggingFace Embedding Models
|
60
|
+
"huggingface/microsoft/codebert-base",
|
61
|
+
"huggingface/BAAI/bge-large-zh",
|
62
|
+
# Mistral AI Embedding Models
|
63
|
+
"mistral/mistral-embed",
|
64
|
+
# Gemini AI Embedding Models
|
65
|
+
"gemini/text-embedding-004",
|
66
|
+
# Vertex AI Embedding Models
|
67
|
+
"vertex_ai/textembedding-gecko",
|
68
|
+
"vertex_ai/textembedding-gecko-multilingual",
|
69
|
+
"vertex_ai/textembedding-gecko-multilingual@001",
|
70
|
+
"vertex_ai/textembedding-gecko@001",
|
71
|
+
"vertex_ai/textembedding-gecko@003",
|
72
|
+
"vertex_ai/text-embedding-preview-0409",
|
73
|
+
"vertex_ai/text-multilingual-embedding-preview-0409",
|
74
|
+
# Voyage AI Embedding Models
|
75
|
+
"voyage/voyage-01",
|
76
|
+
"voyage/voyage-lite-01",
|
77
|
+
"voyage/voyage-lite-01-instruct",
|
78
|
+
# Nebius AI Studio Embedding Models
|
79
|
+
"nebius/BAAI/bge-en-icl",
|
80
|
+
"nebius/BAAI/bge-multilingual-gemma2",
|
81
|
+
"nebius/intfloat/e5-mistral-7b-instruct",
|
82
|
+
# Ollama Embedding Models
|
83
|
+
"ollama/granite-embedding:30m",
|
84
|
+
"ollama/granite-embedding:278m",
|
85
|
+
"ollama/snowflake-arctic-embed2",
|
86
|
+
"ollama/bge-large",
|
87
|
+
"ollama/paraphrase-multilingual",
|
88
|
+
"ollama/bge-m3",
|
89
|
+
"ollama/snowflake-arctic-embed",
|
90
|
+
"ollama/mxbai-embed-large",
|
91
|
+
"ollama/all-minilm",
|
92
|
+
"ollama/nomic-embed-text",
|
93
|
+
]
|
94
|
+
"""Common embedding models supported by `litellm`."""
|
95
|
+
|
96
|
+
|
97
|
+
class LiteLlmEmbeddingModelSettings(TypedDict):
|
98
|
+
"""Valid settings for the `litellm` embedding models."""
|
99
|
+
|
100
|
+
model: LiteLlmEmbeddingModel | str
|
101
|
+
dimensions: Optional[int]
|
102
|
+
encoding_format: Optional[str]
|
103
|
+
timeout: Optional[int]
|
104
|
+
api_base: Optional[str]
|
105
|
+
api_version: Optional[str]
|
106
|
+
api_key: Optional[str]
|
107
|
+
api_type: Optional[str]
|
108
|
+
caching: bool
|
109
|
+
user: Optional[str]
|
110
|
+
|
111
|
+
|
112
|
+
class LiteLlmEmbeddingError(Exception):
|
113
|
+
"""Exception raised when an error occurs while generating embeddings
|
114
|
+
using `litellm`."""
|
115
|
+
|
116
|
+
def __init__(self, message: str, response: Any):
|
117
|
+
self.message = message
|
118
|
+
self.response = response
|
119
|
+
super().__init__(self.message)
|
120
|
+
|
121
|
+
|
122
|
+
def _parse_litellm_response_to_embedding_response(response: Any) -> EmbeddingResponse:
|
123
|
+
"""Parse the response from `litellm` to an `EmbeddingResponse` object."""
|
124
|
+
try:
|
125
|
+
embedding_data: List[Embedding] = []
|
126
|
+
|
127
|
+
for i, item in enumerate(response.data):
|
128
|
+
embedding_data.append(
|
129
|
+
Embedding(embedding=item["embedding"], index=i, object="embedding")
|
130
|
+
)
|
131
|
+
usage = EmbeddingUsage(
|
132
|
+
prompt_tokens=response.usage.prompt_tokens,
|
133
|
+
total_tokens=response.usage.total_tokens,
|
134
|
+
)
|
135
|
+
return EmbeddingResponse(
|
136
|
+
data=embedding_data,
|
137
|
+
model=response.model,
|
138
|
+
object="list",
|
139
|
+
usage=usage,
|
140
|
+
)
|
141
|
+
except Exception as e:
|
142
|
+
raise LiteLlmEmbeddingError(
|
143
|
+
f"Failed to parse litellm response to embedding response: {e}",
|
144
|
+
response,
|
145
|
+
)
|
146
|
+
|
147
|
+
|
148
|
+
class LiteLlmEmbeddingsClient(BaseEmbeddingsClient):
|
149
|
+
"""Embeddings provider client that utilizes the `litellm` module
|
150
|
+
when generating embeddings."""
|
151
|
+
|
152
|
+
@staticmethod
|
153
|
+
async def async_embed(
|
154
|
+
input: List[Any] | Any,
|
155
|
+
model: LiteLlmEmbeddingModel | str,
|
156
|
+
dimensions: Optional[int] = None,
|
157
|
+
encoding_format: Optional[str] = None,
|
158
|
+
timeout=600,
|
159
|
+
api_base: Optional[str] = None,
|
160
|
+
api_version: Optional[str] = None,
|
161
|
+
api_key: Optional[str] = None,
|
162
|
+
api_type: Optional[str] = None,
|
163
|
+
caching: bool = False,
|
164
|
+
user: Optional[str] = None,
|
165
|
+
format: bool = False,
|
166
|
+
) -> Embedding:
|
167
|
+
"""Asynchronously generate embeddings for the given input using
|
168
|
+
a valid `litellm` model.
|
169
|
+
|
170
|
+
Args:
|
171
|
+
input (List[Any] | Any) : The input text / content to generate embeddings for.
|
172
|
+
model (LiteLlmEmbeddingModel | str) : The model to use for generating embeddings.
|
173
|
+
dimensions (Optional[int]) : The number of dimensions for the embedding.
|
174
|
+
encoding_format (Optional[str]) : The format to return the embeddings in. (e.g. "float", "base64")
|
175
|
+
timeout (int) : The timeout for the request.
|
176
|
+
api_base (Optional[str]) : The base URL for the API.
|
177
|
+
api_version (Optional[str]) : The version of the API.
|
178
|
+
api_key (Optional[str]) : The API key to use for the request.
|
179
|
+
api_type (Optional[str]) : The API type to use for the request.
|
180
|
+
caching (bool) : Whether to cache the request.
|
181
|
+
user (Optional[str]) : The user to use for the request.
|
182
|
+
format (bool) : Whether to format each non-string input as a markdown string.
|
183
|
+
|
184
|
+
Returns:
|
185
|
+
Embedding : The embedding generated for the given input.
|
186
|
+
"""
|
187
|
+
if not isinstance(input, list):
|
188
|
+
input = [input]
|
189
|
+
|
190
|
+
if format:
|
191
|
+
for i in input:
|
192
|
+
try:
|
193
|
+
i = convert_to_text(i)
|
194
|
+
except Exception as e:
|
195
|
+
raise LiteLlmEmbeddingError(
|
196
|
+
f"Failed to format input to text: {e}",
|
197
|
+
i,
|
198
|
+
)
|
199
|
+
|
200
|
+
async_embedding_fn = get_litellm().aembedding
|
201
|
+
|
202
|
+
try:
|
203
|
+
response = await async_embedding_fn(
|
204
|
+
model=model,
|
205
|
+
input=input,
|
206
|
+
dimensions=dimensions,
|
207
|
+
encoding_format=encoding_format,
|
208
|
+
timeout=timeout,
|
209
|
+
api_base=api_base,
|
210
|
+
api_version=api_version,
|
211
|
+
api_key=api_key,
|
212
|
+
api_type=api_type,
|
213
|
+
caching=caching,
|
214
|
+
user=user,
|
215
|
+
)
|
216
|
+
except Exception as e:
|
217
|
+
raise e
|
218
|
+
|
219
|
+
return _parse_litellm_response_to_embedding_response(response)
|
220
|
+
|
221
|
+
@staticmethod
|
222
|
+
def embed(
|
223
|
+
input: List[Any] | Any,
|
224
|
+
model: LiteLlmEmbeddingModel | str,
|
225
|
+
dimensions: Optional[int] = None,
|
226
|
+
encoding_format: Optional[str] = None,
|
227
|
+
timeout=600,
|
228
|
+
api_base: Optional[str] = None,
|
229
|
+
api_version: Optional[str] = None,
|
230
|
+
api_key: Optional[str] = None,
|
231
|
+
api_type: Optional[str] = None,
|
232
|
+
caching: bool = False,
|
233
|
+
user: Optional[str] = None,
|
234
|
+
format: bool = False,
|
235
|
+
) -> Embedding:
|
236
|
+
"""Generate embeddings for the given input using
|
237
|
+
a valid `litellm` model.
|
238
|
+
|
239
|
+
Args:
|
240
|
+
input (List[Any] | Any) : The input text / content to generate embeddings for.
|
241
|
+
model (LiteLlmEmbeddingModel | str) : The model to use for generating embeddings.
|
242
|
+
dimensions (Optional[int]) : The number of dimensions for the embedding.
|
243
|
+
encoding_format (Optional[str]) : The format to return the embeddings in. (e.g. "float", "base64")
|
244
|
+
timeout (int) : The timeout for the request.
|
245
|
+
api_base (Optional[str]) : The base URL for the API.
|
246
|
+
api_version (Optional[str]) : The version of the API.
|
247
|
+
api_key (Optional[str]) : The API key to use for the request.
|
248
|
+
api_type (Optional[str]) : The API type to use for the request.
|
249
|
+
caching (bool) : Whether to cache the request.
|
250
|
+
user (Optional[str]) : The user to use for the request.
|
251
|
+
format (bool) : Whether to format each non-string input as a markdown string.
|
252
|
+
|
253
|
+
Returns:
|
254
|
+
Embedding : The embedding generated for the given input.
|
255
|
+
"""
|
256
|
+
if not isinstance(input, list):
|
257
|
+
input = [input]
|
258
|
+
|
259
|
+
if format:
|
260
|
+
for i in input:
|
261
|
+
try:
|
262
|
+
i = convert_to_text(i)
|
263
|
+
except Exception as e:
|
264
|
+
raise LiteLlmEmbeddingError(
|
265
|
+
f"Failed to format input to text: {e}",
|
266
|
+
i,
|
267
|
+
)
|
268
|
+
|
269
|
+
sync_embedding_fn = get_litellm().embedding
|
270
|
+
|
271
|
+
try:
|
272
|
+
response = sync_embedding_fn(
|
273
|
+
model=model,
|
274
|
+
input=input,
|
275
|
+
dimensions=dimensions,
|
276
|
+
encoding_format=encoding_format,
|
277
|
+
timeout=timeout,
|
278
|
+
api_base=api_base,
|
279
|
+
api_version=api_version,
|
280
|
+
api_key=api_key,
|
281
|
+
api_type=api_type,
|
282
|
+
caching=caching,
|
283
|
+
user=user,
|
284
|
+
)
|
285
|
+
except Exception as e:
|
286
|
+
raise e
|
287
|
+
|
288
|
+
return _parse_litellm_response_to_embedding_response(response)
|
@@ -0,0 +1,159 @@
|
|
1
|
+
"""hammad.ai.embeddings.create"""
|
2
|
+
|
3
|
+
from typing import Any, List, Optional
|
4
|
+
|
5
|
+
from .types import (
|
6
|
+
EmbeddingResponse,
|
7
|
+
)
|
8
|
+
from .client.fastembed_text_embeddings_client import (
|
9
|
+
FastEmbedTextEmbeddingsClient,
|
10
|
+
FastEmbedTextEmbeddingModel,
|
11
|
+
FastEmbedTextEmbeddingModelSettings,
|
12
|
+
)
|
13
|
+
from .client.litellm_embeddings_client import (
|
14
|
+
LiteLlmEmbeddingsClient,
|
15
|
+
LiteLlmEmbeddingModel,
|
16
|
+
)
|
17
|
+
|
18
|
+
|
19
|
+
__all__ = ("async_create_embeddings", "create_embeddings")
|
20
|
+
|
21
|
+
|
22
|
+
async def async_create_embeddings(
|
23
|
+
input: List[Any] | Any,
|
24
|
+
model: FastEmbedTextEmbeddingModel | LiteLlmEmbeddingModel | str,
|
25
|
+
format: bool = False,
|
26
|
+
# LiteLLM Settings
|
27
|
+
dimensions: Optional[int] = None,
|
28
|
+
encoding_format: Optional[str] = None,
|
29
|
+
timeout: Optional[int] = None,
|
30
|
+
api_base: Optional[str] = None,
|
31
|
+
api_version: Optional[str] = None,
|
32
|
+
api_key: Optional[str] = None,
|
33
|
+
api_type: Optional[str] = None,
|
34
|
+
caching: bool = False,
|
35
|
+
user: Optional[str] = None,
|
36
|
+
# FastEmbed Settings
|
37
|
+
parallel: Optional[int] = None,
|
38
|
+
batch_size: Optional[int] = None,
|
39
|
+
**kwargs: Any,
|
40
|
+
) -> EmbeddingResponse:
|
41
|
+
"""Asynchronously create embeddings for the given input using the specified model.
|
42
|
+
|
43
|
+
Args:
|
44
|
+
input (List[Any] | Any) : The input text / content to generate embeddings for.
|
45
|
+
model (FastEmbedTextEmbeddingModel | LiteLlmEmbeddingModel | str) : The model to use for generating embeddings.
|
46
|
+
format (bool) : Whether to format each non-string input as a markdown string.
|
47
|
+
dimensions (Optional[int]) : The dimensions of the embedding. NOTE: LiteLLM models only
|
48
|
+
encoding_format (Optional[str]) : The encoding format of the embedding. NOTE: LiteLLM models only
|
49
|
+
timeout (Optional[int]) : The timeout for the embedding. NOTE: LiteLLM models only
|
50
|
+
api_base (Optional[str]) : The base URL for the embedding API. NOTE: LiteLLM models only
|
51
|
+
api_version (Optional[str]) : The version of the embedding API. NOTE: LiteLLM models only
|
52
|
+
api_key (Optional[str]) : The API key for the embedding API. NOTE: LiteLLM models only
|
53
|
+
api_type (Optional[str]) : The type of the embedding API. NOTE: LiteLLM models only
|
54
|
+
caching (bool) : Whether to cache the embedding. NOTE: LiteLLM models only
|
55
|
+
user (Optional[str]) : The user for the embedding. NOTE: LiteLLM models only
|
56
|
+
parallel (Optional[int]) : The number of parallel processes to use for the embedding. NOTE: FastEmbed models only
|
57
|
+
batch_size (Optional[int]) : The batch size to use for the embedding. NOTE: FastEmbed models only
|
58
|
+
**kwargs : Any : Additional keyword arguments to pass to the embedding client.
|
59
|
+
|
60
|
+
Returns:
|
61
|
+
EmbeddingResponse : The embedding response from the embedding client.
|
62
|
+
"""
|
63
|
+
|
64
|
+
if model.startswith("fastembed/"):
|
65
|
+
model = model.split("fastembed/")[1]
|
66
|
+
return await FastEmbedTextEmbeddingsClient.async_embed(
|
67
|
+
input=input,
|
68
|
+
model=model,
|
69
|
+
parallel=parallel,
|
70
|
+
batch_size=batch_size,
|
71
|
+
format=format,
|
72
|
+
**kwargs,
|
73
|
+
)
|
74
|
+
else:
|
75
|
+
return await LiteLlmEmbeddingsClient.async_embed(
|
76
|
+
input=input,
|
77
|
+
model=model,
|
78
|
+
dimensions=dimensions,
|
79
|
+
encoding_format=encoding_format,
|
80
|
+
timeout=timeout,
|
81
|
+
api_base=api_base,
|
82
|
+
api_version=api_version,
|
83
|
+
api_key=api_key,
|
84
|
+
api_type=api_type,
|
85
|
+
caching=caching,
|
86
|
+
user=user,
|
87
|
+
format=format,
|
88
|
+
**kwargs,
|
89
|
+
)
|
90
|
+
|
91
|
+
|
92
|
+
def create_embeddings(
|
93
|
+
input: List[Any] | Any,
|
94
|
+
model: FastEmbedTextEmbeddingModel | LiteLlmEmbeddingModel | str,
|
95
|
+
format: bool = False,
|
96
|
+
# LiteLLM Settings
|
97
|
+
dimensions: Optional[int] = None,
|
98
|
+
encoding_format: Optional[str] = None,
|
99
|
+
timeout: Optional[int] = None,
|
100
|
+
api_base: Optional[str] = None,
|
101
|
+
api_version: Optional[str] = None,
|
102
|
+
api_key: Optional[str] = None,
|
103
|
+
api_type: Optional[str] = None,
|
104
|
+
caching: bool = False,
|
105
|
+
user: Optional[str] = None,
|
106
|
+
# FastEmbed Settings
|
107
|
+
parallel: Optional[int] = None,
|
108
|
+
batch_size: Optional[int] = None,
|
109
|
+
**kwargs: Any,
|
110
|
+
) -> EmbeddingResponse:
|
111
|
+
"""Asynchronously create embeddings for the given input using the specified model.
|
112
|
+
|
113
|
+
Args:
|
114
|
+
input (List[Any] | Any) : The input text / content to generate embeddings for.
|
115
|
+
model (FastEmbedTextEmbeddingModel | LiteLlmEmbeddingModel | str) : The model to use for generating embeddings.
|
116
|
+
format (bool) : Whether to format each non-string input as a markdown string.
|
117
|
+
dimensions (Optional[int]) : The dimensions of the embedding. NOTE: LiteLLM models only
|
118
|
+
encoding_format (Optional[str]) : The encoding format of the embedding. NOTE: LiteLLM models only
|
119
|
+
timeout (Optional[int]) : The timeout for the embedding. NOTE: LiteLLM models only
|
120
|
+
api_base (Optional[str]) : The base URL for the embedding API. NOTE: LiteLLM models only
|
121
|
+
api_version (Optional[str]) : The version of the embedding API. NOTE: LiteLLM models only
|
122
|
+
api_key (Optional[str]) : The API key for the embedding API. NOTE: LiteLLM models only
|
123
|
+
api_type (Optional[str]) : The type of the embedding API. NOTE: LiteLLM models only
|
124
|
+
caching (bool) : Whether to cache the embedding. NOTE: LiteLLM models only
|
125
|
+
user (Optional[str]) : The user for the embedding. NOTE: LiteLLM models only
|
126
|
+
parallel (Optional[int]) : The number of parallel processes to use for the embedding. NOTE: FastEmbed models only
|
127
|
+
batch_size (Optional[int]) : The batch size to use for the embedding. NOTE: FastEmbed models only
|
128
|
+
**kwargs : Any : Additional keyword arguments to pass to the embedding client.
|
129
|
+
|
130
|
+
Returns:
|
131
|
+
EmbeddingResponse : The embedding response from the embedding client.
|
132
|
+
"""
|
133
|
+
|
134
|
+
if model.startswith("fastembed/"):
|
135
|
+
model = model.split("fastembed/")[1]
|
136
|
+
return FastEmbedTextEmbeddingsClient.embed(
|
137
|
+
input=input,
|
138
|
+
model=model,
|
139
|
+
parallel=parallel,
|
140
|
+
batch_size=batch_size,
|
141
|
+
format=format,
|
142
|
+
**kwargs,
|
143
|
+
)
|
144
|
+
else:
|
145
|
+
return LiteLlmEmbeddingsClient.embed(
|
146
|
+
input=input,
|
147
|
+
model=model,
|
148
|
+
dimensions=dimensions,
|
149
|
+
encoding_format=encoding_format,
|
150
|
+
timeout=timeout,
|
151
|
+
api_base=api_base,
|
152
|
+
api_version=api_version,
|
153
|
+
api_key=api_key,
|
154
|
+
api_type=api_type,
|
155
|
+
caching=caching,
|
156
|
+
user=user,
|
157
|
+
format=format,
|
158
|
+
**kwargs,
|
159
|
+
)
|
@@ -0,0 +1,69 @@
|
|
1
|
+
"""hammad.ai.embeddings.types"""
|
2
|
+
|
3
|
+
from typing import List, Literal
|
4
|
+
|
5
|
+
from ...base.model import Model
|
6
|
+
|
7
|
+
__all__ = (
|
8
|
+
"Embedding",
|
9
|
+
"EmbeddingUsage",
|
10
|
+
"EmbeddingResponse",
|
11
|
+
)
|
12
|
+
|
13
|
+
|
14
|
+
class Embedding(Model):
|
15
|
+
embedding: List[float]
|
16
|
+
"""The embedding vector, which is a list of floats.
|
17
|
+
|
18
|
+
The length of vector depends on the model as listed in the
|
19
|
+
[embedding guide](https://platform.openai.com/docs/guides/embeddings).
|
20
|
+
"""
|
21
|
+
|
22
|
+
index: int
|
23
|
+
"""The index of the embedding in the list of embeddings."""
|
24
|
+
|
25
|
+
object: Literal["embedding"]
|
26
|
+
"""The object type, which is always "embedding"."""
|
27
|
+
|
28
|
+
@property
|
29
|
+
def dimensions(self) -> int:
|
30
|
+
"""The dimensions of the embedding."""
|
31
|
+
return len(self.embedding)
|
32
|
+
|
33
|
+
|
34
|
+
class EmbeddingUsage(Model):
|
35
|
+
"""Usage statistics for embedding requests."""
|
36
|
+
|
37
|
+
prompt_tokens: int
|
38
|
+
"""The number of tokens used by the prompt."""
|
39
|
+
|
40
|
+
total_tokens: int
|
41
|
+
"""The total number of tokens used by the request."""
|
42
|
+
|
43
|
+
|
44
|
+
class EmbeddingResponse(Model):
|
45
|
+
data: List[Embedding]
|
46
|
+
"""The list of embeddings generated by the model."""
|
47
|
+
|
48
|
+
model: str
|
49
|
+
"""The name of the model used to generate the embedding."""
|
50
|
+
|
51
|
+
object: Literal["list"]
|
52
|
+
"""The object type, which is always "list"."""
|
53
|
+
|
54
|
+
usage: EmbeddingUsage
|
55
|
+
"""The usage information for the request."""
|
56
|
+
|
57
|
+
@property
|
58
|
+
def dimensions(self) -> int:
|
59
|
+
"""The dimensions of the embedding."""
|
60
|
+
return len(self.data[0].embedding)
|
61
|
+
|
62
|
+
def __str__(self) -> str:
|
63
|
+
return (
|
64
|
+
"Embedding Response:\n"
|
65
|
+
f">>> Model: {self.model}\n"
|
66
|
+
f">>> Dimensions: {self.dimensions}\n"
|
67
|
+
f">>> Usage: {self.usage}\n"
|
68
|
+
f">>> Number of Generated Embeddings: {len(self.data)}\n"
|
69
|
+
)
|
hammad/base/__init__.py
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
"""hammad.base
|
2
|
+
|
3
|
+
Contains the `Model` and `field` system along with an assortment
|
4
|
+
of various utilities for interacting and managing these objects.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from typing import TYPE_CHECKING
|
8
|
+
from .._core._utils._import_utils import _auto_create_getattr_loader
|
9
|
+
|
10
|
+
if TYPE_CHECKING:
|
11
|
+
from .model import Model, model_settings
|
12
|
+
from .fields import field, Field, FieldInfo
|
13
|
+
from .utils import create_model, validator, is_field, is_model, get_field_info
|
14
|
+
|
15
|
+
__all__ = (
|
16
|
+
# hammad.models.model
|
17
|
+
"Model",
|
18
|
+
"model_settings",
|
19
|
+
# hammad.models.fields
|
20
|
+
"field",
|
21
|
+
"Field",
|
22
|
+
"FieldInfo",
|
23
|
+
# hammad.models.utils
|
24
|
+
"create_model",
|
25
|
+
"validator",
|
26
|
+
"is_field",
|
27
|
+
"is_model",
|
28
|
+
"get_field_info",
|
29
|
+
)
|
30
|
+
|
31
|
+
__getattr__ = _auto_create_getattr_loader(__all__)
|
32
|
+
|
33
|
+
|
34
|
+
def __dir__() -> list[str]:
|
35
|
+
return list(__all__)
|
hammad/{based → base}/fields.py
RENAMED
@@ -1,4 +1,4 @@
|
|
1
|
-
"""hammad.
|
1
|
+
"""hammad.base.fields"""
|
2
2
|
|
3
3
|
import re
|
4
4
|
from dataclasses import dataclass
|
@@ -9,18 +9,18 @@ import msgspec
|
|
9
9
|
from msgspec import field as msgspec_field
|
10
10
|
|
11
11
|
__all__ = (
|
12
|
-
"
|
13
|
-
"
|
14
|
-
"
|
15
|
-
"
|
16
|
-
"
|
17
|
-
"
|
18
|
-
"
|
12
|
+
"FieldInfo",
|
13
|
+
"field",
|
14
|
+
"Field",
|
15
|
+
"str_field",
|
16
|
+
"int_field",
|
17
|
+
"float_field",
|
18
|
+
"list_field",
|
19
19
|
)
|
20
20
|
|
21
21
|
|
22
22
|
@dataclass(frozen=True, slots=True)
|
23
|
-
class
|
23
|
+
class FieldInfo:
|
24
24
|
"""Immutable field information container optimized for performance.
|
25
25
|
|
26
26
|
Uses frozen dataclass with slots for memory efficiency and faster attribute access.
|
@@ -294,14 +294,14 @@ class BasedFieldInfo:
|
|
294
294
|
return schema
|
295
295
|
|
296
296
|
|
297
|
-
class
|
297
|
+
class Field:
|
298
298
|
"""Field descriptor that combines msgspec.field with FieldInfo metadata.
|
299
299
|
|
300
300
|
This class wraps msgspec's field functionality while preserving our
|
301
301
|
extended metadata for validation and serialization.
|
302
302
|
"""
|
303
303
|
|
304
|
-
def __init__(self, field_info:
|
304
|
+
def __init__(self, field_info: FieldInfo):
|
305
305
|
self.field_info = field_info
|
306
306
|
self._msgspec_field = None
|
307
307
|
|
@@ -330,7 +330,7 @@ class BasedField:
|
|
330
330
|
return f"Field({self.field_info})"
|
331
331
|
|
332
332
|
|
333
|
-
def
|
333
|
+
def field(
|
334
334
|
default: Any = msgspec.UNSET,
|
335
335
|
*,
|
336
336
|
default_factory: Optional[Callable[[], Any]] = None,
|
@@ -370,7 +370,7 @@ def basedfield(
|
|
370
370
|
pre_validators: Optional[List[Callable[[Any], Any]]] = None,
|
371
371
|
post_validators: Optional[List[Callable[[Any], Any]]] = None,
|
372
372
|
) -> Any:
|
373
|
-
"""Create a field descriptor for
|
373
|
+
"""Create a field descriptor for Model with Pydantic-like configuration.
|
374
374
|
|
375
375
|
This function creates a field with validation, serialization, and schema
|
376
376
|
generation capabilities while maintaining msgspec's performance benefits.
|
@@ -415,10 +415,10 @@ def basedfield(
|
|
415
415
|
post_validators: List of post-processing validators
|
416
416
|
|
417
417
|
Returns:
|
418
|
-
Field descriptor or Annotated type with metadata for use with
|
418
|
+
Field descriptor or Annotated type with metadata for use with Model
|
419
419
|
"""
|
420
420
|
# Store field info for potential future use (validation, schema generation, etc.)
|
421
|
-
info =
|
421
|
+
info = FieldInfo(
|
422
422
|
default=default,
|
423
423
|
default_factory=default_factory,
|
424
424
|
alias=alias,
|
@@ -472,7 +472,7 @@ def basedfield(
|
|
472
472
|
return msgspec_field_instance
|
473
473
|
|
474
474
|
|
475
|
-
def
|
475
|
+
def str_field(
|
476
476
|
*,
|
477
477
|
min_length: Optional[int] = None,
|
478
478
|
max_length: Optional[int] = None,
|
@@ -483,7 +483,7 @@ def str_basedfield(
|
|
483
483
|
**kwargs,
|
484
484
|
) -> Any:
|
485
485
|
"""Create a string field with common string-specific options."""
|
486
|
-
return
|
486
|
+
return field(
|
487
487
|
min_length=min_length,
|
488
488
|
max_length=max_length,
|
489
489
|
pattern=pattern,
|
@@ -494,7 +494,7 @@ def str_basedfield(
|
|
494
494
|
)
|
495
495
|
|
496
496
|
|
497
|
-
def
|
497
|
+
def int_field(
|
498
498
|
*,
|
499
499
|
gt: Optional[int] = None,
|
500
500
|
ge: Optional[int] = None,
|
@@ -504,10 +504,10 @@ def int_basedfield(
|
|
504
504
|
**kwargs,
|
505
505
|
) -> Any:
|
506
506
|
"""Create an integer field with numeric constraints."""
|
507
|
-
return
|
507
|
+
return field(gt=gt, ge=ge, lt=lt, le=le, multiple_of=multiple_of, **kwargs)
|
508
508
|
|
509
509
|
|
510
|
-
def
|
510
|
+
def float_field(
|
511
511
|
*,
|
512
512
|
gt: Optional[float] = None,
|
513
513
|
ge: Optional[float] = None,
|
@@ -518,7 +518,7 @@ def float_basedfield(
|
|
518
518
|
**kwargs,
|
519
519
|
) -> Any:
|
520
520
|
"""Create a float field with numeric constraints."""
|
521
|
-
return
|
521
|
+
return field(
|
522
522
|
gt=gt,
|
523
523
|
ge=ge,
|
524
524
|
lt=lt,
|
@@ -529,7 +529,7 @@ def float_basedfield(
|
|
529
529
|
)
|
530
530
|
|
531
531
|
|
532
|
-
def
|
532
|
+
def list_field(
|
533
533
|
*,
|
534
534
|
min_length: Optional[int] = None,
|
535
535
|
max_length: Optional[int] = None,
|
@@ -537,7 +537,7 @@ def list_basedfield(
|
|
537
537
|
**kwargs,
|
538
538
|
) -> Any:
|
539
539
|
"""Create a list field with collection constraints."""
|
540
|
-
return
|
540
|
+
return field(
|
541
541
|
default_factory=list,
|
542
542
|
min_length=min_length,
|
543
543
|
max_length=max_length,
|