ai-parrot 0.8.3__cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ai-parrot might be problematic. Click here for more details.

Files changed (128) hide show
  1. ai_parrot-0.8.3.dist-info/LICENSE +21 -0
  2. ai_parrot-0.8.3.dist-info/METADATA +306 -0
  3. ai_parrot-0.8.3.dist-info/RECORD +128 -0
  4. ai_parrot-0.8.3.dist-info/WHEEL +6 -0
  5. ai_parrot-0.8.3.dist-info/top_level.txt +2 -0
  6. parrot/__init__.py +30 -0
  7. parrot/bots/__init__.py +5 -0
  8. parrot/bots/abstract.py +1115 -0
  9. parrot/bots/agent.py +492 -0
  10. parrot/bots/basic.py +9 -0
  11. parrot/bots/bose.py +17 -0
  12. parrot/bots/chatbot.py +271 -0
  13. parrot/bots/cody.py +17 -0
  14. parrot/bots/copilot.py +117 -0
  15. parrot/bots/data.py +730 -0
  16. parrot/bots/dataframe.py +103 -0
  17. parrot/bots/hrbot.py +15 -0
  18. parrot/bots/interfaces/__init__.py +1 -0
  19. parrot/bots/interfaces/retrievers.py +12 -0
  20. parrot/bots/notebook.py +619 -0
  21. parrot/bots/odoo.py +17 -0
  22. parrot/bots/prompts/__init__.py +41 -0
  23. parrot/bots/prompts/agents.py +91 -0
  24. parrot/bots/prompts/data.py +214 -0
  25. parrot/bots/retrievals/__init__.py +1 -0
  26. parrot/bots/retrievals/constitutional.py +19 -0
  27. parrot/bots/retrievals/multi.py +122 -0
  28. parrot/bots/retrievals/retrieval.py +610 -0
  29. parrot/bots/tools/__init__.py +7 -0
  30. parrot/bots/tools/eda.py +325 -0
  31. parrot/bots/tools/pdf.py +50 -0
  32. parrot/bots/tools/plot.py +48 -0
  33. parrot/bots/troc.py +16 -0
  34. parrot/conf.py +170 -0
  35. parrot/crew/__init__.py +3 -0
  36. parrot/crew/tools/__init__.py +22 -0
  37. parrot/crew/tools/bing.py +13 -0
  38. parrot/crew/tools/config.py +43 -0
  39. parrot/crew/tools/duckgo.py +62 -0
  40. parrot/crew/tools/file.py +24 -0
  41. parrot/crew/tools/google.py +168 -0
  42. parrot/crew/tools/gtrends.py +16 -0
  43. parrot/crew/tools/md2pdf.py +25 -0
  44. parrot/crew/tools/rag.py +42 -0
  45. parrot/crew/tools/search.py +32 -0
  46. parrot/crew/tools/url.py +21 -0
  47. parrot/exceptions.cpython-39-x86_64-linux-gnu.so +0 -0
  48. parrot/handlers/__init__.py +4 -0
  49. parrot/handlers/agents.py +292 -0
  50. parrot/handlers/bots.py +196 -0
  51. parrot/handlers/chat.py +192 -0
  52. parrot/interfaces/__init__.py +6 -0
  53. parrot/interfaces/database.py +27 -0
  54. parrot/interfaces/http.py +805 -0
  55. parrot/interfaces/images/__init__.py +0 -0
  56. parrot/interfaces/images/plugins/__init__.py +18 -0
  57. parrot/interfaces/images/plugins/abstract.py +58 -0
  58. parrot/interfaces/images/plugins/exif.py +709 -0
  59. parrot/interfaces/images/plugins/hash.py +52 -0
  60. parrot/interfaces/images/plugins/vision.py +104 -0
  61. parrot/interfaces/images/plugins/yolo.py +66 -0
  62. parrot/interfaces/images/plugins/zerodetect.py +197 -0
  63. parrot/llms/__init__.py +1 -0
  64. parrot/llms/abstract.py +69 -0
  65. parrot/llms/anthropic.py +58 -0
  66. parrot/llms/gemma.py +15 -0
  67. parrot/llms/google.py +44 -0
  68. parrot/llms/groq.py +67 -0
  69. parrot/llms/hf.py +45 -0
  70. parrot/llms/openai.py +61 -0
  71. parrot/llms/pipes.py +114 -0
  72. parrot/llms/vertex.py +89 -0
  73. parrot/loaders/__init__.py +9 -0
  74. parrot/loaders/abstract.py +628 -0
  75. parrot/loaders/files/__init__.py +0 -0
  76. parrot/loaders/files/abstract.py +39 -0
  77. parrot/loaders/files/text.py +63 -0
  78. parrot/loaders/txt.py +26 -0
  79. parrot/manager.py +333 -0
  80. parrot/models.py +504 -0
  81. parrot/py.typed +0 -0
  82. parrot/stores/__init__.py +11 -0
  83. parrot/stores/abstract.py +248 -0
  84. parrot/stores/chroma.py +188 -0
  85. parrot/stores/duck.py +162 -0
  86. parrot/stores/embeddings/__init__.py +10 -0
  87. parrot/stores/embeddings/abstract.py +46 -0
  88. parrot/stores/embeddings/base.py +52 -0
  89. parrot/stores/embeddings/bge.py +20 -0
  90. parrot/stores/embeddings/fastembed.py +17 -0
  91. parrot/stores/embeddings/google.py +18 -0
  92. parrot/stores/embeddings/huggingface.py +20 -0
  93. parrot/stores/embeddings/ollama.py +14 -0
  94. parrot/stores/embeddings/openai.py +26 -0
  95. parrot/stores/embeddings/transformers.py +21 -0
  96. parrot/stores/embeddings/vertexai.py +17 -0
  97. parrot/stores/empty.py +10 -0
  98. parrot/stores/faiss.py +160 -0
  99. parrot/stores/milvus.py +397 -0
  100. parrot/stores/postgres.py +653 -0
  101. parrot/stores/qdrant.py +170 -0
  102. parrot/tools/__init__.py +23 -0
  103. parrot/tools/abstract.py +68 -0
  104. parrot/tools/asknews.py +33 -0
  105. parrot/tools/basic.py +51 -0
  106. parrot/tools/bby.py +359 -0
  107. parrot/tools/bing.py +13 -0
  108. parrot/tools/docx.py +343 -0
  109. parrot/tools/duck.py +62 -0
  110. parrot/tools/execute.py +56 -0
  111. parrot/tools/gamma.py +28 -0
  112. parrot/tools/google.py +170 -0
  113. parrot/tools/gvoice.py +301 -0
  114. parrot/tools/results.py +278 -0
  115. parrot/tools/stack.py +27 -0
  116. parrot/tools/weather.py +70 -0
  117. parrot/tools/wikipedia.py +58 -0
  118. parrot/tools/zipcode.py +198 -0
  119. parrot/utils/__init__.py +2 -0
  120. parrot/utils/parsers/__init__.py +5 -0
  121. parrot/utils/parsers/toml.cpython-39-x86_64-linux-gnu.so +0 -0
  122. parrot/utils/toml.py +11 -0
  123. parrot/utils/types.cpython-39-x86_64-linux-gnu.so +0 -0
  124. parrot/utils/uv.py +11 -0
  125. parrot/version.py +10 -0
  126. resources/users/__init__.py +5 -0
  127. resources/users/handlers.py +13 -0
  128. resources/users/models.py +205 -0
@@ -0,0 +1,248 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import List, Union
3
+ import importlib
4
+ from collections.abc import Callable
5
+ from langchain.docstore.document import Document
6
+ from langchain_core.vectorstores import VectorStoreRetriever
7
+ from navconfig.logging import logging
8
+ from ..conf import (
9
+ EMBEDDING_DEFAULT_MODEL
10
+ )
11
+ from ..exceptions import ConfigError # pylint: disable=E0611
12
+ from .embeddings import supported_embeddings
13
+
14
+
15
+ class AbstractStore(ABC):
16
+ """AbstractStore class.
17
+
18
+ Base class for all Database Vector Stores.
19
+ Args:
20
+ embeddings (str): Embedding name.
21
+
22
+ Supported Vector Stores:
23
+ - Qdrant
24
+ - Milvus
25
+ - Faiss
26
+ - Chroma
27
+ - PgVector
28
+ """
29
+
30
+ def __init__(
31
+ self,
32
+ embedding_model: Union[dict, str] = None,
33
+ embedding: Union[dict, Callable] = None,
34
+ **kwargs
35
+ ):
36
+ self.client: Callable = None
37
+ self.vector: Callable = None
38
+ self._embed_: Callable = None
39
+ self._connected: bool = False
40
+ if embedding_model is not None:
41
+ if isinstance(embedding_model, str):
42
+ self.embedding_model = {
43
+ 'model_name': embedding_model,
44
+ 'model_type': 'huggingface'
45
+ }
46
+ elif isinstance(embedding_model, dict):
47
+ self.embedding_model = embedding_model
48
+ else:
49
+ raise ValueError(
50
+ "Embedding Model must be a string or a dictionary."
51
+ )
52
+ # Use or not connection to a vector database:
53
+ self._use_database: bool = kwargs.get('use_database', True)
54
+ # Database Information:
55
+ self.collection_name: str = kwargs.get('collection_name', 'my_collection')
56
+ self.dimension: int = kwargs.get("dimension", 768)
57
+ self._metric_type: str = kwargs.get("metric_type", 'COSINE')
58
+ self._index_type: str = kwargs.get("index_type", 'IVF_FLAT')
59
+ self.database: str = kwargs.get('database', '')
60
+ self.index_name = kwargs.get("index_name", "my_index")
61
+ if embedding is not None:
62
+ if isinstance(embedding, str):
63
+ self.embedding_model = {
64
+ 'model_name': embedding,
65
+ 'model_type': 'huggingface'
66
+ }
67
+ elif isinstance(embedding, dict):
68
+ self.embedding_model = embedding
69
+ else:
70
+ # is a callable:
71
+ self.embedding_model = {
72
+ 'model_name': EMBEDDING_DEFAULT_MODEL,
73
+ 'model_type': 'huggingface'
74
+ }
75
+ self._embed_ = embedding
76
+ self.logger = logging.getLogger(
77
+ f"Store.{__name__}"
78
+ )
79
+ # Client Connection (if required):
80
+ self._connection = None
81
+ # Create the Embedding Model:
82
+ self._embed_ = self.create_embedding(
83
+ embedding_model=self.embedding_model
84
+ )
85
+
86
+ @property
87
+ def connected(self) -> bool:
88
+ return self._connected
89
+
90
+ def is_connected(self):
91
+ return self._connected
92
+
93
+ @abstractmethod
94
+ async def connection(self) -> tuple:
95
+ pass
96
+
97
+ @abstractmethod
98
+ async def disconnect(self) -> None:
99
+ pass
100
+
101
+ # Async Context Manager
102
+ async def __aenter__(self):
103
+ if self._use_database:
104
+ if not self._connection:
105
+ await self.connection()
106
+ return self
107
+
108
+ async def _free_resources(self):
109
+ self._embed_.free()
110
+ self._embed_ = None
111
+
112
+ async def __aexit__(self, exc_type, exc_value, traceback):
113
+ # closing Embedding
114
+ try:
115
+ await self.disconnect()
116
+ except RuntimeError:
117
+ pass
118
+
119
+ @abstractmethod
120
+ def get_vector(self, metric_type: str = None, **kwargs):
121
+ pass
122
+
123
+ def get_vectorstore(self):
124
+ return self.get_vector()
125
+
126
+ @abstractmethod
127
+ async def similarity_search(
128
+ self,
129
+ query: str,
130
+ collection: Union[str, None] = None,
131
+ limit: int = 2
132
+ ) -> list: # noqa
133
+ pass
134
+
135
+ @abstractmethod
136
+ async def from_documents(
137
+ self,
138
+ documents: List[Document],
139
+ collection: Union[str, None] = None,
140
+ **kwargs
141
+ ) -> Callable:
142
+ """
143
+ Create Vector Store from Documents.
144
+
145
+ Args:
146
+ documents (List[Document]): List of Documents.
147
+ collection (str): Collection Name.
148
+ kwargs: Additional Arguments.
149
+
150
+ Returns:
151
+ Callable VectorStore.
152
+ """
153
+
154
+ @abstractmethod
155
+ async def add_documents(
156
+ self,
157
+ documents: List[Document],
158
+ collection: Union[str, None] = None,
159
+ **kwargs
160
+ ) -> None:
161
+ """
162
+ Add Documents to Vector Store.
163
+
164
+ Args:
165
+ documents (List[Document]): List of Documents.
166
+ collection (str): Collection Name.
167
+ kwargs: Additional Arguments.
168
+
169
+ Returns:
170
+ None.
171
+ """
172
+
173
+ def create_embedding(
174
+ self,
175
+
176
+ embedding_model: dict,
177
+ **kwargs
178
+ ):
179
+ """
180
+ Create Embedding Model.
181
+
182
+ Args:
183
+ embedding_model (dict): Embedding Model Configuration.
184
+ kwargs: Additional Arguments.
185
+
186
+ Returns:
187
+ Callable: Embedding Model.
188
+
189
+ """
190
+ model_type = embedding_model.get('model_type', 'huggingface')
191
+ model_name = embedding_model.get('model_name', EMBEDDING_DEFAULT_MODEL)
192
+ if model_type not in supported_embeddings:
193
+ raise ConfigError(
194
+ f"Embedding Model Type: {model_type} not supported."
195
+ )
196
+ embed_cls = supported_embeddings[model_type]
197
+ cls_path = f".embeddings.{model_type}" # Relative module path
198
+ try:
199
+ embed_module = importlib.import_module(
200
+ cls_path,
201
+ package=__package__
202
+ )
203
+ embed_obj = getattr(embed_module, embed_cls)
204
+ return embed_obj(
205
+ model_name=model_name,
206
+ **kwargs
207
+ )
208
+ except ImportError as e:
209
+ raise ConfigError(
210
+ f"Error Importing Embedding Model: {model_type}"
211
+ ) from e
212
+
213
+ def get_default_embedding(self):
214
+ embed_model = {
215
+ 'model_name': EMBEDDING_DEFAULT_MODEL,
216
+ 'model_type': 'huggingface'
217
+ }
218
+ return self.create_embedding(
219
+ embedding_model=embed_model
220
+ )
221
+
222
+ def generate_embedding(self, documents: List[Document]):
223
+ if not self._embed_:
224
+ self._embed_ = self.get_default_embedding()
225
+
226
+ # Using the Embed Model to Generate Embeddings:
227
+ embeddings = self._embed_.embed_documents(documents)
228
+ return embeddings
229
+
230
+ def as_retriever(
231
+ self,
232
+ metric_type: str = 'COSINE',
233
+ index_type: str = 'IVF_FLAT',
234
+ search_type: str = 'similarity',
235
+ chain_type: str = 'stuff',
236
+ search_kwargs: dict = None
237
+ ) -> Callable:
238
+ vector = self.get_vector(metric_type=metric_type, index_type=index_type)
239
+ if not vector:
240
+ raise ConfigError(
241
+ "Vector Store is not connected. Check your connection."
242
+ )
243
+ return VectorStoreRetriever(
244
+ vectorstore=vector,
245
+ search_type=search_type,
246
+ chain_type=chain_type,
247
+ search_kwargs=search_kwargs
248
+ )
@@ -0,0 +1,188 @@
1
+ from collections.abc import Callable
2
+ from typing import Optional, Union
3
+ from uuid import uuid4
4
+ import logging
5
+ from langchain.docstore.document import Document
6
+ from langchain.memory import VectorStoreRetrieverMemory
7
+ import chromadb
8
+ from langchain_chroma import Chroma
9
+ from .abstract import AbstractStore
10
+ from ..conf import CHROMADB_HOST, CHROMADB_PORT
11
+
12
+
13
+ logging.getLogger('chromadb').setLevel(logging.INFO)
14
+
15
+
16
+ class ChromaStore(AbstractStore):
17
+ """Chroma DB Store Class.
18
+
19
+ Using Chroma as Document Vector Store.
20
+
21
+ """
22
+
23
+ def __init__(
24
+ self,
25
+ embedding_model: Union[dict, str] = None,
26
+ embedding: Union[dict, Callable] = None,
27
+ **kwargs
28
+ ):
29
+ super().__init__(
30
+ embedding_model=embedding_model,
31
+ embedding=embedding,
32
+ **kwargs
33
+ )
34
+ self.database_path: str = kwargs.pop('database_path', 'chroma.db')
35
+ self._ephemeral: bool = kwargs.pop('ephemeral', False)
36
+ self._local: bool = kwargs.pop('local', False)
37
+ self.host = kwargs.pop("host", CHROMADB_HOST)
38
+ self.port = kwargs.pop("port", CHROMADB_PORT)
39
+ self._collection = None
40
+
41
+ async def connection(self):
42
+ """Connection to ChromaDB.
43
+
44
+ Args:
45
+ alias (str): Database alias.
46
+
47
+ Returns:
48
+ Callable: ChromaDB connection.
49
+
50
+ """
51
+ if self._ephemeral:
52
+ self._connection = chromadb.Client()
53
+ elif self._local:
54
+ self._connection = chromadb.PersistentClient(
55
+ path=self.database_path,
56
+ database=self.database,
57
+ )
58
+ else:
59
+ # Client-Server Connection:
60
+ self._connection = chromadb.HttpClient(
61
+ host=self.host,
62
+ port=self.port,
63
+ database=self.database,
64
+ )
65
+ self._collection = self._connection.get_or_create_collection(self.collection_name)
66
+ self._connected = True
67
+ return self._connection
68
+
69
+ async def disconnect(self) -> None:
70
+ """
71
+ Closing the Connection on ChromaDB
72
+ """
73
+ self._connection = None
74
+ self._connected = False
75
+
76
+ def get_vector(
77
+ self,
78
+ collection: Union[str, None] = None,
79
+ embedding: Optional[Callable] = None,
80
+ ) -> Chroma:
81
+
82
+ if not collection:
83
+ collection = self.collection_name
84
+ if embedding is not None:
85
+ _embed_ = embedding
86
+ else:
87
+ _embed_ = self._embed_ or self.create_embedding(
88
+ embedding_model=self.embedding_model
89
+ )
90
+ return Chroma(
91
+ collection_name=self.collection_name,
92
+ embedding_function=_embed_.embedding,
93
+ client=self._connection,
94
+ create_collection_if_not_exists=True,
95
+ )
96
+
97
+ async def from_documents(self, documents: list[Document], collection: str = None, **kwargs):
98
+ """
99
+ Save Documents as Vectors in Chroma.
100
+ """
101
+ vectordb = await Chroma.afrom_documents(
102
+ documents=documents,
103
+ embedding=self._embed_.embedding,
104
+ connection=self._connection,
105
+ )
106
+ return vectordb
107
+
108
+
109
+ async def add_texts(self, objects: list, collection: str = None):
110
+ """
111
+ Add Texts to ChromaDB
112
+ """
113
+ async with self:
114
+ collection = self._connection.get_or_create_collection(collection)
115
+ for i, doc in enumerate(objects):
116
+ collection.add(ids=[str(i)], documents=[doc])
117
+ return True
118
+
119
+ async def add_documents(
120
+ self,
121
+ documents: list,
122
+ collection: str = None,
123
+ embedding: Optional[Callable] = None,
124
+ ) -> bool:
125
+ """Add Documents to ChromaDB"""
126
+
127
+ if collection is None:
128
+ collection = self.collection_name
129
+
130
+ async with self:
131
+ collection_obj = self._connection.get_or_create_collection(collection)
132
+ uuids = [str(uuid4()) for _ in range(len(documents))]
133
+ vector_db = self.get_vector(collection=collection, embedding=embedding)
134
+ await vector_db.aadd_documents(documents=documents, ids=uuids)
135
+
136
+ return True
137
+
138
+
139
+ async def update_documents(
140
+ self,
141
+ documents: list,
142
+ collection: str = None,
143
+ embedding: Optional[Callable] = None,
144
+ ) -> bool:
145
+ """
146
+ Update Documents to ChromaDB
147
+ """
148
+ async with self:
149
+ collection = self._connection.get_or_create_collection(collection)
150
+ vector_db = self.get_vector(collection=collection, embedding=embedding)
151
+ # Split the documents into ids and documents
152
+ if all('id' in doc for doc in documents):
153
+ ids = [doc.pop('id') for doc in documents]
154
+ vector_db.update_documents(documents=documents, ids=ids)
155
+ return True
156
+ return False
157
+
158
+ async def similarity_search(
159
+ self,
160
+ query: str,
161
+ collection: Union[str, None] = None,
162
+ embedding: Optional[Callable] = None,
163
+ limit: int = 2,
164
+ filter: Optional[dict] = None,
165
+ ) -> list:
166
+ if collection is None:
167
+ collection = self.collection_name
168
+ async with self:
169
+ vector_db = self.get_vector(collection=collection, embedding=embedding)
170
+ return vector_db.similarity_search(query, k=limit, filter=filter)
171
+
172
+ def memory_retriever(
173
+ self,
174
+ documents: Optional[list] = None,
175
+ num_results: int = 5
176
+ ) -> VectorStoreRetrieverMemory:
177
+ if not documents:
178
+ documents = []
179
+ vectordb = Chroma.from_documents(
180
+ documents=documents,
181
+ embedding=self._embed_.embedding,
182
+ connection=self._connection,
183
+ )
184
+ retriever = Chroma.as_retriever(
185
+ vectordb,
186
+ search_kwargs=dict(k=num_results)
187
+ )
188
+ return VectorStoreRetrieverMemory(retriever=retriever)
parrot/stores/duck.py ADDED
@@ -0,0 +1,162 @@
1
+ from collections.abc import Callable
2
+ from typing import Optional, Union
3
+ import duckdb
4
+ from langchain.docstore.document import Document
5
+ from langchain.memory import VectorStoreRetrieverMemory
6
+ from langchain_community.vectorstores import DuckDB
7
+ from .abstract import AbstractStore
8
+
9
+
10
+ class DuckDBStore(AbstractStore):
11
+ """DuckDB Store Class.
12
+
13
+ Using DuckDB as Document Vector Store.
14
+
15
+ """
16
+ default_config: dict ={
17
+ "enable_external_access": "false",
18
+ "autoinstall_known_extensions": "false",
19
+ "autoload_known_extensions": "false"
20
+ }
21
+
22
+ def __init__(
23
+ self,
24
+ embedding_model: Union[dict, str] = None,
25
+ embedding: Union[dict, Callable] = None,
26
+ **kwargs
27
+ ):
28
+ super().__init__(
29
+ embedding_model=embedding_model,
30
+ embedding=embedding,
31
+ **kwargs
32
+ )
33
+ self.credentials = {
34
+ "database": self.database,
35
+ }
36
+
37
+ config: dict = kwargs.pop("config", {})
38
+ self.config = {
39
+ **self.default_config,
40
+ **config
41
+ }
42
+
43
+ async def connection(self, alias: str = None):
44
+ """Connection to DuckDB.
45
+
46
+ Args:
47
+ alias (str): Database alias.
48
+
49
+ Returns:
50
+ Callable: DuckDB connection.
51
+
52
+ """
53
+ self._connection = duckdb.connect(**self.credentials, config=self.config)
54
+ self._connected = True
55
+ return self._connection
56
+
57
+ async def disconnect(self) -> None:
58
+ """
59
+ Closing the Connection on DuckDB
60
+ """
61
+ try:
62
+ if self._connection:
63
+ self._connection.close()
64
+ except Exception as err:
65
+ raise RuntimeError(
66
+ message=f"{__name__!s}: Closing Error: {err!s}"
67
+ ) from err
68
+ finally:
69
+ self._connection = None
70
+ self._connected = False
71
+
72
+ def get_vector(
73
+ self,
74
+ collection: Union[str, None] = None,
75
+ embedding: Optional[Callable] = None,
76
+ metadata_field: str = 'id',
77
+ text_field: str = 'text',
78
+ vector_key: str = 'vector',
79
+ ) -> DuckDB:
80
+
81
+ if not collection:
82
+ collection = self.collection_name
83
+ if embedding is not None:
84
+ _embed_ = embedding
85
+ else:
86
+ _embed_ = self.create_embedding(
87
+ embedding_model=self.embedding_model
88
+ )
89
+ return DuckDB(
90
+ connection=self._connection,
91
+ table_name=collection,
92
+ embedding=_embed_,
93
+ vector_key=vector_key,
94
+ text_key=text_field,
95
+ id_key=metadata_field
96
+ )
97
+
98
+
99
+ async def add_texts(self, objects: list, collection: str = None):
100
+ """
101
+ Add Texts to DuckDB
102
+ """
103
+ async with self:
104
+ store = self.get_vector(collection=collection)
105
+ store.add_texts(objects)
106
+ return True
107
+
108
+ async def similarity_search(
109
+ self,
110
+ query: str,
111
+ collection: Union[str, None] = None,
112
+ embedding: Optional[Callable] = None,
113
+ limit: int = 2,
114
+ ) -> list:
115
+ if collection is None:
116
+ collection = self.collection_name
117
+ async with self:
118
+ vector_db = self.get_vector(collection=collection, embedding=embedding)
119
+ return await vector_db.asimilarity_search(query, k=limit)
120
+
121
+ def memory_retriever(
122
+ self,
123
+ documents: Optional[list] = None,
124
+ num_results: int = 5
125
+ ) -> VectorStoreRetrieverMemory:
126
+ if not documents:
127
+ documents = []
128
+ vectordb = DuckDB.from_documents(
129
+ documents=documents,
130
+ embedding=self._embed_.embedding,
131
+ connection=self._connection,
132
+ )
133
+ retriever = DuckDB.as_retriever(
134
+ vectordb,
135
+ search_kwargs=dict(k=num_results)
136
+ )
137
+ return VectorStoreRetrieverMemory(retriever=retriever)
138
+
139
+ async def from_documents(self, documents: list[Document], collection: str = None, **kwargs):
140
+ """
141
+ Save Documents as Vectors in DuckDB.
142
+ """
143
+ if not collection:
144
+ collection = self.collection_name
145
+ vectordb = await DuckDB.afrom_documents(
146
+ documents,
147
+ embedding=self._embed_.embedding,
148
+ connection=self._connection,
149
+ )
150
+ return vectordb
151
+
152
+ async def add_documents(self, documents: list[Document], collection: str = None, **kwargs):
153
+ """
154
+ Add Documents as Vectors in DuckDB.
155
+ """
156
+ if not collection:
157
+ collection = self.collection_name
158
+ vectordb = self.get_vector(collection=collection)
159
+ result = await vectordb.aadd_documents(
160
+ documents=documents
161
+ )
162
+ return result
@@ -0,0 +1,10 @@
1
+ supported_embeddings = {
2
+ 'openai': 'OpenAIEmbed',
3
+ 'google': 'GoogleEmbed',
4
+ 'vertexai': 'VertexAIEmbed',
5
+ 'huggingface': 'HugginfaceEmbed',
6
+ 'fastembed': 'FastembedEmbed',
7
+ 'bge': 'BgeEmbed',
8
+ 'ollama': 'OllamaEmbed',
9
+ 'transformers': 'TransformersEmbed'
10
+ }
@@ -0,0 +1,46 @@
1
+ from abc import ABC, abstractmethod
2
+ from ...conf import (
3
+ MAX_BATCH_SIZE,
4
+ EMBEDDING_DEFAULT_MODEL,
5
+ EMBEDDING_DEVICE
6
+ )
7
+
8
+ class AbstractEmbed(ABC):
9
+ """A wrapper class for Create embeddings."""
10
+ model_name: str = EMBEDDING_DEFAULT_MODEL
11
+ encode_kwargs: str = {
12
+ 'normalize_embeddings': True,
13
+ "batch_size": MAX_BATCH_SIZE
14
+ }
15
+ model_kwargs = {
16
+ 'device': EMBEDDING_DEVICE,
17
+ 'trust_remote_code':True
18
+ }
19
+
20
+ def __init__(self, model_name: str = None, **kwargs):
21
+ self._embedding = self._create_embedding(model_name, **kwargs)
22
+
23
+ @property
24
+ def embedding(self):
25
+ return self._embedding
26
+
27
+ def free(self):
28
+ """
29
+ Free the resources.
30
+ """
31
+ pass
32
+
33
+ def _get_device(self):
34
+ return EMBEDDING_DEVICE
35
+
36
+ @abstractmethod
37
+ def _create_embedding(self, model_name: str = None, **kwargs):
38
+ """
39
+ Create Embedding Model.
40
+ Args:
41
+ model_name (str): The name of the model to use.
42
+
43
+ Returns:
44
+ Callable: Embedding Model.
45
+ """
46
+ pass
@@ -0,0 +1,52 @@
1
+ from typing import Optional
2
+ import gc
3
+ import torch
4
+ from .abstract import AbstractEmbed
5
+ from ...conf import CUDA_DEFAULT_DEVICE, EMBEDDING_DEVICE
6
+
7
+
8
+ class BaseEmbed(AbstractEmbed):
9
+ """A wrapper class for Base embeddings.
10
+
11
+ Use this class to Embedding Models that requires Torch/Transformers.
12
+ """
13
+ model_kwargs = {
14
+ 'device': EMBEDDING_DEVICE,
15
+ 'trust_remote_code':True
16
+ }
17
+ def _get_device(self, device_type: str = None, cuda_number: Optional[int] = None):
18
+ """Get Default device for Torch and transformers.
19
+
20
+ """
21
+ # torch.backends.cudnn.deterministic = True
22
+ if device_type is not None:
23
+ return torch.device(device_type)
24
+ if torch.cuda.is_available():
25
+ if CUDA_DEFAULT_DEVICE == 'cpu':
26
+ # Use CPU even if CUDA is available
27
+ return torch.device('cpu')
28
+ if cuda_number is not None:
29
+ # Use specified CUDA GPU
30
+ return torch.device(f'cuda:{cuda_number}')
31
+ # Use CUDA GPU if available
32
+ if cuda_number is None:
33
+ return torch.device(f'cuda:0')
34
+ if torch.backends.mps.is_available():
35
+ # Use CUDA Multi-Processing Service if available
36
+ return torch.device("mps")
37
+ if EMBEDDING_DEVICE == 'cuda':
38
+ if cuda_number is None and CUDA_DEFAULT_DEVICE != 'cpu':
39
+ # Use CUDA GPU if available
40
+ cuda_number = CUDA_DEFAULT_DEVICE
41
+ return torch.device(f'cuda:{cuda_number}')
42
+ return torch.device(EMBEDDING_DEVICE)
43
+
44
+ def free(self):
45
+ """
46
+ Free the resources.
47
+ """
48
+ try:
49
+ gc.collect() # Run Python garbage collector to free unreferenced objects
50
+ torch.cuda.empty_cache() # Release cached memory blocks back to the GPU
51
+ except Exception as e:
52
+ print(f"Error: {e}")