langroid 0.2.12__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,6 +24,8 @@ class EmbeddingModel(ABC):
24
24
  @classmethod
25
25
  def create(cls, config: EmbeddingModelsConfig) -> "EmbeddingModel":
26
26
  from langroid.embedding_models.models import (
27
+ FastEmbedEmbeddings,
28
+ FastEmbedEmbeddingsConfig,
27
29
  OpenAIEmbeddings,
28
30
  OpenAIEmbeddingsConfig,
29
31
  SentenceTransformerEmbeddings,
@@ -40,6 +42,8 @@ class EmbeddingModel(ABC):
40
42
  return OpenAIEmbeddings(config)
41
43
  elif isinstance(config, SentenceTransformerEmbeddingsConfig):
42
44
  return SentenceTransformerEmbeddings(config)
45
+ elif isinstance(config, FastEmbedEmbeddingsConfig):
46
+ return FastEmbedEmbeddings(config)
43
47
  else:
44
48
  raise ValueError(f"Unknown embedding config: {config.__repr_name__}")
45
49
 
@@ -1,12 +1,14 @@
1
1
  import atexit
2
2
  import os
3
- from typing import Callable, List, Optional
3
+ from functools import cached_property
4
+ from typing import Any, Callable, Dict, List, Optional
4
5
 
5
6
  import tiktoken
6
7
  from dotenv import load_dotenv
7
8
  from openai import OpenAI
8
9
 
9
10
  from langroid.embedding_models.base import EmbeddingModel, EmbeddingModelsConfig
11
+ from langroid.exceptions import LangroidImportError
10
12
  from langroid.mytypes import Embeddings
11
13
  from langroid.parsing.utils import batched
12
14
 
@@ -32,6 +34,20 @@ class SentenceTransformerEmbeddingsConfig(EmbeddingModelsConfig):
32
34
  devices: Optional[list[str]] = None
33
35
 
34
36
 
37
+ class FastEmbedEmbeddingsConfig(EmbeddingModelsConfig):
38
+ """Config for qdrant/fastembed embeddings,
39
+ see here: https://github.com/qdrant/fastembed
40
+ """
41
+
42
+ model_type: str = "fastembed"
43
+ model_name: str = "BAAI/bge-small-en-v1.5"
44
+ batch_size: int = 256
45
+ cache_dir: Optional[str] = None
46
+ threads: Optional[int] = None
47
+ parallel: Optional[int] = None
48
+ additional_kwargs: Dict[str, Any] = {}
49
+
50
+
35
51
  class EmbeddingFunctionCallable:
36
52
  """
37
53
  A callable class designed to generate embeddings for a list of texts using
@@ -189,6 +205,41 @@ class SentenceTransformerEmbeddings(EmbeddingModel):
189
205
  return dims # type: ignore
190
206
 
191
207
 
208
+ class FastEmbedEmbeddings(EmbeddingModel):
209
+ def __init__(self, config: FastEmbedEmbeddingsConfig = FastEmbedEmbeddingsConfig()):
210
+ try:
211
+ from fastembed import TextEmbedding
212
+ except ImportError:
213
+ raise LangroidImportError("fastembed", extra="fastembed")
214
+
215
+ super().__init__()
216
+ self.config = config
217
+ self._batch_size = config.batch_size
218
+ self._parallel = config.parallel
219
+
220
+ self._model = TextEmbedding(
221
+ model_name=self.config.model_name,
222
+ cache_dir=self.config.cache_dir,
223
+ threads=self.config.threads,
224
+ **self.config.additional_kwargs,
225
+ )
226
+
227
+ def embedding_fn(self) -> Callable[[List[str]], Embeddings]:
228
+ def fn(texts: List[str]) -> Embeddings:
229
+ embeddings = self._model.embed(
230
+ texts, batch_size=self._batch_size, parallel=self._parallel
231
+ )
232
+
233
+ return [embedding.tolist() for embedding in embeddings]
234
+
235
+ return fn
236
+
237
+ @cached_property
238
+ def embedding_dims(self) -> int:
239
+ embed_func = self.embedding_fn()
240
+ return len(embed_func(["text"])[0])
241
+
242
+
192
243
  def embedding_model(embedding_fn_type: str = "openai") -> EmbeddingModel:
193
244
  """
194
245
  Args:
@@ -198,5 +249,7 @@ def embedding_model(embedding_fn_type: str = "openai") -> EmbeddingModel:
198
249
  """
199
250
  if embedding_fn_type == "openai":
200
251
  return OpenAIEmbeddings # type: ignore
252
+ elif embedding_fn_type == "fastembed":
253
+ return FastEmbedEmbeddings # type: ignore
201
254
  else: # default sentence transformer
202
255
  return SentenceTransformerEmbeddings # type: ignore
@@ -1,11 +1,11 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: langroid
3
- Version: 0.2.12
3
+ Version: 0.3.0
4
4
  Summary: Harness LLMs with Multi-Agent Programming
5
5
  License: MIT
6
6
  Author: Prasad Chalasani
7
7
  Author-email: pchalasani@gmail.com
8
- Requires-Python: >=3.10,<4.0
8
+ Requires-Python: >=3.10,<3.13
9
9
  Classifier: License :: OSI Approved :: MIT License
10
10
  Classifier: Programming Language :: Python :: 3
11
11
  Classifier: Programming Language :: Python :: 3.10
@@ -17,6 +17,7 @@ Provides-Extra: chromadb
17
17
  Provides-Extra: db
18
18
  Provides-Extra: doc-chat
19
19
  Provides-Extra: docx
20
+ Provides-Extra: fastembed
20
21
  Provides-Extra: hf-embeddings
21
22
  Provides-Extra: hf-transformers
22
23
  Provides-Extra: lancedb
@@ -44,6 +45,7 @@ Requires-Dist: docstring-parser (>=0.15,<0.16)
44
45
  Requires-Dist: duckduckgo-search (>=6.0.0,<7.0.0)
45
46
  Requires-Dist: faker (>=18.9.0,<19.0.0)
46
47
  Requires-Dist: fakeredis (>=2.12.1,<3.0.0)
48
+ Requires-Dist: fastembed (>=0.3.1,<0.4.0) ; extra == "all" or extra == "fastembed"
47
49
  Requires-Dist: fire (>=0.5.0,<0.6.0)
48
50
  Requires-Dist: google-api-python-client (>=2.95.0,<3.0.0)
49
51
  Requires-Dist: google-generativeai (>=0.5.2,<0.6.0)
@@ -52,9 +52,9 @@ langroid/cachedb/base.py,sha256=ztVjB1DtN6pLCujCWnR6xruHxwVj3XkYniRTYAKKqk0,1354
52
52
  langroid/cachedb/momento_cachedb.py,sha256=YEOJ62hEcV6iIeMr5aGgRYgWQqFYaej9gEDEcY0sm7M,3172
53
53
  langroid/cachedb/redis_cachedb.py,sha256=7kgnbf4b5CKsCrlL97mHWKvdvlLt8zgn7lc528jEpiE,5141
54
54
  langroid/embedding_models/__init__.py,sha256=lsu8qxCjfGujXGueJWU-VI3LMZYGjLSYgqUKDd4F3Qo,715
55
- langroid/embedding_models/base.py,sha256=MSjaTkFcfoMGY6SHPOqAsbZbKctj8-1N6zgaFYmOFTg,1830
55
+ langroid/embedding_models/base.py,sha256=3dK0nW3XNjK3Vyh2kxhIffzDuUYumVVkCIimB3UPHeU,2009
56
56
  langroid/embedding_models/clustering.py,sha256=tZWElUqXl9Etqla0FAa7og96iDKgjqWjucZR_Egtp-A,6684
57
- langroid/embedding_models/models.py,sha256=-xeN0irBPc1tUgRFHGM1ki4NwOIHr6F3SKuEjD5nTOg,7144
57
+ langroid/embedding_models/models.py,sha256=NQ1Cfw5MmeR69fzthIWjnkSAQuWcxnUecS5z_U2IPNs,8900
58
58
  langroid/embedding_models/protoc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
59
  langroid/embedding_models/protoc/embeddings.proto,sha256=_O-SgFpTaylQeOTgSpxhEJ7CUw7PeCQQJLaPqpPYKJg,321
60
60
  langroid/embedding_models/protoc/embeddings_pb2.py,sha256=4Q57PhOunv-uZNJrxYrWBXAI0ZtfnVZXFRhRj5JuRSg,1662
@@ -129,8 +129,8 @@ langroid/vector_store/meilisearch.py,sha256=6frB7GFWeWmeKzRfLZIvzRjllniZ1cYj3Hmh
129
129
  langroid/vector_store/momento.py,sha256=QaPzUnTwlswoawGB-paLtUPyLRvckFXLfLDfvbTzjNQ,10505
130
130
  langroid/vector_store/qdrant_cloud.py,sha256=3im4Mip0QXLkR6wiqVsjV1QvhSElfxdFSuDKddBDQ-4,188
131
131
  langroid/vector_store/qdrantdb.py,sha256=wYOuu5c2vIKn9ZgvTXcAiZXMpV8AOXEWFAzI8S8UP-0,16828
132
- pyproject.toml,sha256=IQC1A8fpg0cKWje_Gt3YxZpRQw9ie1xlJ8TaiS4Vr3w,6960
133
- langroid-0.2.12.dist-info/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
134
- langroid-0.2.12.dist-info/METADATA,sha256=66JdF6SmaH5yisjOpT6dgYsjXAG7FbnD8OaTAmVnV7I,54101
135
- langroid-0.2.12.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
136
- langroid-0.2.12.dist-info/RECORD,,
132
+ pyproject.toml,sha256=gh7X6Y1IgIluL77xY0ejjIJMOIXBPw2-oLYcXUg01Ng,7054
133
+ langroid-0.3.0.dist-info/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
134
+ langroid-0.3.0.dist-info/METADATA,sha256=MUh8_clSRdnwAFTiu8ESCAyXKNkFbU59uXvthr6r2CI,54210
135
+ langroid-0.3.0.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
136
+ langroid-0.3.0.dist-info/RECORD,,
pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "langroid"
3
- version = "0.2.12"
3
+ version = "0.3.0"
4
4
  description = "Harness LLMs with Multi-Agent Programming"
5
5
  authors = ["Prasad Chalasani <pchalasani@gmail.com>"]
6
6
  readme = "README.md"
@@ -10,7 +10,7 @@ include = ["pyproject.toml"]
10
10
 
11
11
  # =============== MAIN DEPS ==============
12
12
  [tool.poetry.dependencies]
13
- python = "^3.10"
13
+ python = ">=3.10,<3.13"
14
14
 
15
15
  # =========== OPTIONALS ==============================
16
16
  chromadb = {version=">=0.4.21, <=0.4.23", optional=true}
@@ -39,6 +39,7 @@ pyarrow = {version="15.0.0", optional=true}
39
39
  pdfplumber = {version="^0.10.2", optional=true}
40
40
  python-docx = {version="^1.1.0", optional=true}
41
41
  scrapy = {version="^2.11.0", optional=true}
42
+ fastembed = {version="^0.3.1", optional=true}
42
43
 
43
44
  # ====CORE================================
44
45
  pyyaml = "^6.0.1"
@@ -111,6 +112,7 @@ all = [
111
112
  "metaphor-python", "neo4j",
112
113
  "litellm",
113
114
  "chainlit", "python-socketio",
115
+ "fastembed"
114
116
  ]
115
117
  # more granular groupings
116
118
  lancedb = ["lancedb", "tantivy", "pyarrow"]
@@ -135,6 +137,7 @@ mkdocs = [
135
137
  ]
136
138
  meilisearch = ["meilisearch-python-sdk"]
137
139
  momento = ["momento"]
140
+ fastembed = ["fastembed"]
138
141
 
139
142
 
140
143
  # ================= DEV DEPS =================