fairo 0.1__tar.gz → 25.5.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of fairo might be problematic. Click here for more details.
- {fairo-0.1 → fairo-25.5.2}/PKG-INFO +1 -1
- {fairo-0.1 → fairo-25.5.2}/fairo/core/agent/base_agent.py +10 -3
- {fairo-0.1 → fairo-25.5.2}/fairo/core/execution/executor.py +5 -1
- {fairo-0.1 → fairo-25.5.2}/fairo/core/workflow/base_workflow.py +3 -2
- fairo-25.5.2/fairo/core/workflow/dependency.py +456 -0
- {fairo-0.1 → fairo-25.5.2}/fairo.egg-info/PKG-INFO +1 -1
- fairo-0.1/fairo/core/workflow/dependency.py +0 -149
- {fairo-0.1 → fairo-25.5.2}/README.md +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo/__init__.py +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo/core/__init__.py +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo/core/agent/__init__.py +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo/core/agent/code_analysis_agent.py +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo/core/agent/output/__init__.py +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo/core/agent/output/base_output.py +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo/core/agent/output/google_drive.py +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo/core/agent/tools/__init__.py +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo/core/agent/tools/base_tools.py +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo/core/agent/tools/code_analysis.py +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo/core/agent/tools/utils.py +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo/core/agent/utils.py +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo/core/client/__init__.py +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo/core/client/client.py +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo/core/exceptions.py +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo/core/execution/__init__.py +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo/core/models/__init__.py +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo/core/models/custom_field_value.py +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo/core/models/resources.py +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo/core/workflow/__init__.py +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo/core/workflow/utils.py +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo/metrics/__init__.py +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo/metrics/fairness_object.py +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo/metrics/metrics.py +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo/settings.py +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo/tests/__init__.py +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo/tests/test_metrics.py +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo.egg-info/SOURCES.txt +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo.egg-info/dependency_links.txt +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo.egg-info/requires.txt +0 -0
- {fairo-0.1 → fairo-25.5.2}/fairo.egg-info/top_level.txt +0 -0
- {fairo-0.1 → fairo-25.5.2}/pyproject.toml +0 -0
- {fairo-0.1 → fairo-25.5.2}/setup.cfg +0 -0
|
@@ -9,10 +9,10 @@ from typing import Dict, List, Optional, Callable, Any, Tuple
|
|
|
9
9
|
from langchain.schema import HumanMessage, AIMessage, SystemMessage
|
|
10
10
|
from langchain_core.messages import ToolMessage
|
|
11
11
|
from langchain_community.chat_models.mlflow import ChatMlflow
|
|
12
|
-
|
|
12
|
+
from langchain_core.runnables.config import RunnableConfig
|
|
13
13
|
from fairo.core.agent.output.base_output import BaseOutput
|
|
14
14
|
from fairo.core.agent.tools.base_tools import BaseTool
|
|
15
|
-
from
|
|
15
|
+
from fairo.core.agent.tools.utils import Iteration, LLMAgentOutput, ToolResult
|
|
16
16
|
from langchain_core.messages.tool import ToolCall
|
|
17
17
|
from fairo.core.client.client import BaseClient
|
|
18
18
|
from fairo.core.workflow.dependency import BaseVectorStore
|
|
@@ -41,7 +41,8 @@ class SimpleAgent:
|
|
|
41
41
|
patch_run_output_json: Callable[[LLMAgentOutput], None] = None,
|
|
42
42
|
client: BaseClient = None,
|
|
43
43
|
knowledge_stores: Optional[List[BaseVectorStore]] = None,
|
|
44
|
-
max_iterations: int = 10
|
|
44
|
+
max_iterations: int = 10,
|
|
45
|
+
workflow_run_id: str = "",
|
|
45
46
|
):
|
|
46
47
|
"""
|
|
47
48
|
Initialize the SimpleAgent with its characteristics and capabilities.
|
|
@@ -64,9 +65,11 @@ class SimpleAgent:
|
|
|
64
65
|
self.backstory = backstory
|
|
65
66
|
self.verbose = verbose
|
|
66
67
|
self.use_langchain_mlflow_chat = True if not llm else False
|
|
68
|
+
self.workflow_run_id = workflow_run_id
|
|
67
69
|
self.llm = llm or ChatMlflow(
|
|
68
70
|
target_uri=get_mlflow_gateway_uri(),
|
|
69
71
|
endpoint=get_mlflow_gateway_chat_route(),
|
|
72
|
+
extra_params={"workflow_run_id": self.workflow_run_id}
|
|
70
73
|
)
|
|
71
74
|
self.memory = memory or []
|
|
72
75
|
self.conversation_history = []
|
|
@@ -470,6 +473,10 @@ class SimpleAgent:
|
|
|
470
473
|
self.client = client
|
|
471
474
|
for tool in self.tool_instances:
|
|
472
475
|
tool.set_client(client)
|
|
476
|
+
|
|
477
|
+
def set_workflow_run_id(self, workflow_run_id: str):
|
|
478
|
+
self.set_workflow_run_id = workflow_run_id
|
|
479
|
+
self.llm.extra_params = {"workflow_run_id": workflow_run_id}
|
|
473
480
|
|
|
474
481
|
def run(self, task: str, context: Optional[str] = None, max_iterations: int = None) -> str:
|
|
475
482
|
"""
|
|
@@ -18,12 +18,14 @@ class AgentExecutor:
|
|
|
18
18
|
agents: List[Any],
|
|
19
19
|
verbose: bool = False,
|
|
20
20
|
patch_run_output_json: Callable[[LLMAgentOutput], None] = None,
|
|
21
|
-
client: BaseClient = None
|
|
21
|
+
client: BaseClient = None,
|
|
22
|
+
workflow_run_id: str = ""
|
|
22
23
|
):
|
|
23
24
|
self.agents = agents
|
|
24
25
|
self.verbose = verbose
|
|
25
26
|
self.patch_run_output_json = patch_run_output_json
|
|
26
27
|
self.client = client
|
|
28
|
+
self.workflow_run_id = workflow_run_id
|
|
27
29
|
|
|
28
30
|
# Inject shared attributes into agents
|
|
29
31
|
for agent in self.agents:
|
|
@@ -33,6 +35,8 @@ class AgentExecutor:
|
|
|
33
35
|
agent.set_client(self.client)
|
|
34
36
|
if hasattr(agent, 'verbose'):
|
|
35
37
|
agent.verbose = self.verbose
|
|
38
|
+
if hasattr(agent, 'workflow_run_id'):
|
|
39
|
+
agent.set_workflow_run_id(self.workflow_run_id)
|
|
36
40
|
|
|
37
41
|
self.pipeline = self._build_pipeline()
|
|
38
42
|
|
|
@@ -5,7 +5,7 @@ import os
|
|
|
5
5
|
from enum import Enum
|
|
6
6
|
from typing import List, Union
|
|
7
7
|
from fairo.core.agent.base_agent import SimpleAgent
|
|
8
|
-
from
|
|
8
|
+
from fairo.core.agent.tools.utils import FlowOutput, LLMAgentOutput
|
|
9
9
|
from fairo.core.client.client import BaseClient
|
|
10
10
|
|
|
11
11
|
from fairo.core.execution.executor import AgentExecutor
|
|
@@ -223,7 +223,8 @@ class BaseWorkflow:
|
|
|
223
223
|
agents=self.agents,
|
|
224
224
|
verbose=False,
|
|
225
225
|
patch_run_output_json=self.add_workflow_run_node_output,
|
|
226
|
-
client=client
|
|
226
|
+
client=client,
|
|
227
|
+
workflow_run_id=self.workflow_run_id
|
|
227
228
|
)
|
|
228
229
|
|
|
229
230
|
# Don't catch exceptions here - let them propagate up to be handled in the run method
|
|
@@ -0,0 +1,456 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
3
|
+
from langchain_aws import BedrockEmbeddings
|
|
4
|
+
from langchain_community.embeddings.mlflow import MlflowEmbeddings
|
|
5
|
+
from langchain_core.documents import Document
|
|
6
|
+
from langchain_postgres import PGVector
|
|
7
|
+
from fairo.settings import get_mlflow_gateway_embeddings_route, get_mlflow_gateway_uri
|
|
8
|
+
from fairo.core.client.client import BaseClient
|
|
9
|
+
AWS_AI_EMBEDDING_MODEL = 'cohere.embed-english-v3'
|
|
10
|
+
import requests
|
|
11
|
+
import uuid
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class BaseVectorStore:
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class PostgresVectorStore(BaseVectorStore):
|
|
19
|
+
"""
|
|
20
|
+
A PostgreSQL-based vector store using LangChain and pgvector
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
collection_name: str,
|
|
26
|
+
embedding_model_id: str = AWS_AI_EMBEDDING_MODEL,
|
|
27
|
+
region_name: str = None,
|
|
28
|
+
collection_metadata: dict = None,
|
|
29
|
+
connection_string: str = "postgresql://postgres:postgres@localhost:5432/vectordb",
|
|
30
|
+
pre_delete_collection: bool = False,
|
|
31
|
+
default_k: int = 5
|
|
32
|
+
):
|
|
33
|
+
"""
|
|
34
|
+
Args:
|
|
35
|
+
collection_name: Name of the collection in PostgreSQL
|
|
36
|
+
embedding_model_id: Bedrock embedding model ID
|
|
37
|
+
region_name: AWS region for Bedrock
|
|
38
|
+
collection_metadata: Dict for what metadata we want to add to collection
|
|
39
|
+
connection_string: PostgreSQL connection string
|
|
40
|
+
"""
|
|
41
|
+
self.collection_name = collection_name
|
|
42
|
+
self.connection_string = connection_string
|
|
43
|
+
|
|
44
|
+
# Set up embeddings
|
|
45
|
+
self.embeddings = MlflowEmbeddings(
|
|
46
|
+
target_uri=get_mlflow_gateway_uri(),
|
|
47
|
+
endpoint=get_mlflow_gateway_embeddings_route(),
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
if collection_metadata is not None:
|
|
51
|
+
self.collection_metadata = collection_metadata
|
|
52
|
+
|
|
53
|
+
# Initialize the PGVector store
|
|
54
|
+
self.db = PGVector(
|
|
55
|
+
collection_name=collection_name,
|
|
56
|
+
connection=connection_string,
|
|
57
|
+
collection_metadata=self.collection_metadata,
|
|
58
|
+
embeddings=self.embeddings,
|
|
59
|
+
pre_delete_collection=pre_delete_collection
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
self.default_k = default_k
|
|
63
|
+
|
|
64
|
+
def add_documents(self, documents: List[Document]) -> None:
|
|
65
|
+
"""
|
|
66
|
+
Args:
|
|
67
|
+
documents: List of Document objects to add
|
|
68
|
+
"""
|
|
69
|
+
if not documents:
|
|
70
|
+
return
|
|
71
|
+
|
|
72
|
+
# Add documents to PGVector
|
|
73
|
+
self.db.add_documents(documents)
|
|
74
|
+
|
|
75
|
+
def add_texts(self, texts: List[str], metadatas: Optional[List[Dict[str, Any]]] = None) -> None:
|
|
76
|
+
"""
|
|
77
|
+
Args:
|
|
78
|
+
texts: List of text strings to add
|
|
79
|
+
metadatas: Optional list of metadata dictionaries
|
|
80
|
+
"""
|
|
81
|
+
if not texts:
|
|
82
|
+
return
|
|
83
|
+
|
|
84
|
+
# Convert to Document objects
|
|
85
|
+
documents = []
|
|
86
|
+
for i, text in enumerate(texts):
|
|
87
|
+
metadata = metadatas[i] if metadatas and i < len(metadatas) else {}
|
|
88
|
+
documents.append(Document(page_content=text, metadata=metadata))
|
|
89
|
+
|
|
90
|
+
# Add to vector store
|
|
91
|
+
self.add_documents(documents)
|
|
92
|
+
|
|
93
|
+
@staticmethod
|
|
94
|
+
def _format_query(query):
|
|
95
|
+
# Temporary fix, need to consider model / do more than truncate
|
|
96
|
+
return query[0:2048]
|
|
97
|
+
|
|
98
|
+
def similarity_search(self, query: str, k: int = None) -> List[Document]:
|
|
99
|
+
"""
|
|
100
|
+
Args:
|
|
101
|
+
query: The search query
|
|
102
|
+
k: Number of results to return
|
|
103
|
+
"""
|
|
104
|
+
formatted_query = self._format_query(query)
|
|
105
|
+
if k is None:
|
|
106
|
+
k = self.default_k
|
|
107
|
+
return self.db.similarity_search(formatted_query, k=k)
|
|
108
|
+
|
|
109
|
+
def similarity_search_with_score(self, query: str, k: int = 4) -> List[tuple[Document, float]]:
|
|
110
|
+
"""
|
|
111
|
+
Args:
|
|
112
|
+
query: The search query
|
|
113
|
+
k: Number of results to return
|
|
114
|
+
"""
|
|
115
|
+
formatted_query = self._format_query(query)
|
|
116
|
+
if k is None:
|
|
117
|
+
k = self.default_k
|
|
118
|
+
return self.db.similarity_search_with_score(formatted_query, k=k)
|
|
119
|
+
|
|
120
|
+
def delete(self) -> None:
|
|
121
|
+
"""Delete the collection from PostgreSQL."""
|
|
122
|
+
try:
|
|
123
|
+
# Use the internal PGVector method to delete a collection
|
|
124
|
+
self.db._client.delete_collection(self.collection_name)
|
|
125
|
+
except Exception as e:
|
|
126
|
+
print(f"Error deleting collection: {str(e)}")
|
|
127
|
+
|
|
128
|
+
@classmethod
|
|
129
|
+
def from_existing(cls,
|
|
130
|
+
collection_name: str,
|
|
131
|
+
embedding_model_id: str = AWS_AI_EMBEDDING_MODEL,
|
|
132
|
+
region_name: str = None,
|
|
133
|
+
connection_string: str = "postgresql://postgres:postgres@localhost:5432/vectordb"):
|
|
134
|
+
"""
|
|
135
|
+
Load an existing collection from PostgreSQL.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
collection_name: Name of the existing collection
|
|
139
|
+
embedding_model_id: Bedrock embedding model ID
|
|
140
|
+
region_name: AWS region for Bedrock
|
|
141
|
+
connection_string: PostgreSQL connection string
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
PostgresVectorStore instance connected to the existing collection
|
|
145
|
+
"""
|
|
146
|
+
return cls(
|
|
147
|
+
collection_name=collection_name,
|
|
148
|
+
embedding_model_id=embedding_model_id,
|
|
149
|
+
region_name=region_name,
|
|
150
|
+
connection_string=connection_string
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
class FairoVectorStore(BaseVectorStore):
|
|
154
|
+
"""
|
|
155
|
+
A vector store implementation using the Fairo API
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
def __init__(
|
|
159
|
+
self,
|
|
160
|
+
collection_name: str,
|
|
161
|
+
username: str = None,
|
|
162
|
+
password: str = None,
|
|
163
|
+
api_url: str = None,
|
|
164
|
+
embedding_model_id: str = AWS_AI_EMBEDDING_MODEL,
|
|
165
|
+
region_name: str = None,
|
|
166
|
+
collection_metadata: dict = None,
|
|
167
|
+
create_if_not_exists: bool = True
|
|
168
|
+
):
|
|
169
|
+
"""
|
|
170
|
+
Initialize a Fairo vector store client
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
collection_name: Name of the collection
|
|
174
|
+
username: Fairo API username for authentication
|
|
175
|
+
password: Fairo API password for authentication
|
|
176
|
+
api_url: Fairo API base URL
|
|
177
|
+
embedding_model_id: Bedrock embedding model ID
|
|
178
|
+
region_name: AWS region for Bedrock
|
|
179
|
+
collection_metadata: Dict for metadata to add to collection
|
|
180
|
+
create_if_not_exists: Whether to create the collection if it doesn't exist
|
|
181
|
+
"""
|
|
182
|
+
self.collection_name = collection_name
|
|
183
|
+
|
|
184
|
+
# Get credentials from parameters or environment
|
|
185
|
+
self.username = username or os.environ.get("FAIRO_API_ACCESS_KEY_ID")
|
|
186
|
+
self.password = password or os.environ.get("FAIRO_API_SECRET")
|
|
187
|
+
self.api_url = api_url or os.environ.get("FAIRO_BASE_URL", "https://api.fairo.ai")
|
|
188
|
+
|
|
189
|
+
if not self.username or not self.password:
|
|
190
|
+
raise ValueError("Fairo API credentials must be provided either as parameters or in the FAIRO_USERNAME and FAIRO_PASSWORD environment variables")
|
|
191
|
+
|
|
192
|
+
# Initialize API client
|
|
193
|
+
self.client = BaseClient(
|
|
194
|
+
base_url=self.api_url.rstrip('/'),
|
|
195
|
+
username=self.username,
|
|
196
|
+
password=self.password
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
# Set up embeddings
|
|
200
|
+
self.embeddings = MlflowEmbeddings(
|
|
201
|
+
target_uri=get_mlflow_gateway_uri(),
|
|
202
|
+
endpoint=get_mlflow_gateway_embeddings_route(),
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
self.collection_metadata = collection_metadata or {}
|
|
206
|
+
self.collection_uuid = None
|
|
207
|
+
|
|
208
|
+
# Create or retrieve collection
|
|
209
|
+
if create_if_not_exists:
|
|
210
|
+
self._create_or_get_collection()
|
|
211
|
+
|
|
212
|
+
def _create_or_get_collection(self) -> None:
|
|
213
|
+
"""
|
|
214
|
+
Create a new collection or get an existing one by name
|
|
215
|
+
"""
|
|
216
|
+
try:
|
|
217
|
+
# First try to find if collection exists
|
|
218
|
+
collections_data = self.client.get("/collection_stores")
|
|
219
|
+
|
|
220
|
+
# Check if our collection exists
|
|
221
|
+
for collection in collections_data.get("results", []):
|
|
222
|
+
if collection.get("name") == self.collection_name:
|
|
223
|
+
self.collection_uuid = collection.get("uuid")
|
|
224
|
+
print(f"Found existing collection '{self.collection_name}' with UUID: {self.collection_uuid}")
|
|
225
|
+
return
|
|
226
|
+
|
|
227
|
+
# If collection doesn't exist, create a new one
|
|
228
|
+
if not self.collection_uuid:
|
|
229
|
+
create_data = {
|
|
230
|
+
"name": self.collection_name,
|
|
231
|
+
"description": f"Collection for {self.collection_name}",
|
|
232
|
+
"cmetadata": self.collection_metadata
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
collection_data = self.client.post("/collection_stores", json=create_data)
|
|
236
|
+
self.collection_uuid = collection_data.get("uuid")
|
|
237
|
+
print(f"Created new collection '{self.collection_name}' with UUID: {self.collection_uuid}")
|
|
238
|
+
|
|
239
|
+
except requests.exceptions.HTTPError as e:
|
|
240
|
+
raise Exception(f"Failed to create or get collection: {str(e)}")
|
|
241
|
+
|
|
242
|
+
def add_documents(self, documents: List[Document]) -> None:
|
|
243
|
+
"""
|
|
244
|
+
Add documents to the Fairo vector store
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
documents: List of Document objects to add
|
|
248
|
+
"""
|
|
249
|
+
if not documents:
|
|
250
|
+
return
|
|
251
|
+
|
|
252
|
+
if not self.collection_uuid:
|
|
253
|
+
self._create_or_get_collection()
|
|
254
|
+
|
|
255
|
+
# Convert documents to Fairo format
|
|
256
|
+
docs_data = []
|
|
257
|
+
for doc in documents:
|
|
258
|
+
# Generate embeddings for the document content
|
|
259
|
+
embedding = self.embeddings.embed_query(doc.page_content)
|
|
260
|
+
|
|
261
|
+
# Create doc entry
|
|
262
|
+
doc_entry = {
|
|
263
|
+
"page_content": doc.page_content,
|
|
264
|
+
"metadata": doc.metadata,
|
|
265
|
+
"uuid": str(uuid.uuid4()) # Generate a UUID for this document
|
|
266
|
+
}
|
|
267
|
+
docs_data.append(doc_entry)
|
|
268
|
+
|
|
269
|
+
# Send request to Fairo API
|
|
270
|
+
try:
|
|
271
|
+
payload = {"docs": docs_data}
|
|
272
|
+
self.client.post(f"/collection_stores/{self.collection_uuid}/add_documents", json=payload)
|
|
273
|
+
print(f"Successfully added {len(documents)} documents to Fairo collection")
|
|
274
|
+
|
|
275
|
+
except requests.exceptions.HTTPError as e:
|
|
276
|
+
raise Exception(f"Failed to add documents: {str(e)}")
|
|
277
|
+
|
|
278
|
+
def add_texts(self, texts: List[str], metadatas: Optional[List[Dict[str, Any]]] = None) -> None:
|
|
279
|
+
"""
|
|
280
|
+
Add texts with optional metadata to the Fairo vector store
|
|
281
|
+
|
|
282
|
+
Args:
|
|
283
|
+
texts: List of text strings to add
|
|
284
|
+
metadatas: Optional list of metadata dictionaries
|
|
285
|
+
"""
|
|
286
|
+
if not texts:
|
|
287
|
+
return
|
|
288
|
+
|
|
289
|
+
# Convert to Document objects
|
|
290
|
+
documents = []
|
|
291
|
+
for i, text in enumerate(texts):
|
|
292
|
+
metadata = metadatas[i] if metadatas and i < len(metadatas) else {}
|
|
293
|
+
documents.append(Document(page_content=text, metadata=metadata))
|
|
294
|
+
|
|
295
|
+
# Add to vector store
|
|
296
|
+
self.add_documents(documents)
|
|
297
|
+
|
|
298
|
+
def similarity_search(self, query: str, k: int = 4) -> List[Document]:
|
|
299
|
+
"""
|
|
300
|
+
Search for documents similar to the query string
|
|
301
|
+
|
|
302
|
+
Args:
|
|
303
|
+
query: The search query
|
|
304
|
+
k: Number of results to return
|
|
305
|
+
|
|
306
|
+
Returns:
|
|
307
|
+
List of Document objects
|
|
308
|
+
"""
|
|
309
|
+
# Get search results with scores
|
|
310
|
+
results_with_scores = self.similarity_search_with_score(query, k=k)
|
|
311
|
+
|
|
312
|
+
# Extract just the documents
|
|
313
|
+
return [doc for doc, _ in results_with_scores]
|
|
314
|
+
|
|
315
|
+
def similarity_search_with_score(self, query: str, k: int = 4) -> List[Tuple[Document, float]]:
|
|
316
|
+
"""
|
|
317
|
+
Search for documents similar to the query string and return scores
|
|
318
|
+
|
|
319
|
+
Args:
|
|
320
|
+
query: The search query
|
|
321
|
+
k: Number of results to return
|
|
322
|
+
|
|
323
|
+
Returns:
|
|
324
|
+
List of (Document, score) tuples
|
|
325
|
+
"""
|
|
326
|
+
if not self.collection_uuid:
|
|
327
|
+
self._create_or_get_collection()
|
|
328
|
+
|
|
329
|
+
try:
|
|
330
|
+
|
|
331
|
+
payload = {
|
|
332
|
+
"query": query,
|
|
333
|
+
}
|
|
334
|
+
if k:
|
|
335
|
+
payload["k"] = k
|
|
336
|
+
|
|
337
|
+
# Send search request
|
|
338
|
+
search_results = self.client.post(
|
|
339
|
+
f"/collection_stores/{self.collection_uuid}/similarity_search",
|
|
340
|
+
json=payload
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
# Process search results
|
|
344
|
+
results = []
|
|
345
|
+
|
|
346
|
+
for result in search_results:
|
|
347
|
+
# Create Document object
|
|
348
|
+
doc = Document(
|
|
349
|
+
page_content=result.get("page_content", ""),
|
|
350
|
+
metadata=result.get("metadata", {})
|
|
351
|
+
)
|
|
352
|
+
score = result.get("score", 0.0)
|
|
353
|
+
results.append((doc, score))
|
|
354
|
+
|
|
355
|
+
return results
|
|
356
|
+
|
|
357
|
+
except requests.exceptions.HTTPError as e:
|
|
358
|
+
raise Exception(f"Search failed: {str(e)}")
|
|
359
|
+
|
|
360
|
+
def get_by_id(self, document_id: str) -> Optional[Document]:
|
|
361
|
+
"""
|
|
362
|
+
Retrieve a document by its ID
|
|
363
|
+
|
|
364
|
+
Args:
|
|
365
|
+
document_id: The ID of the document to retrieve
|
|
366
|
+
|
|
367
|
+
Returns:
|
|
368
|
+
Document object if found, None otherwise
|
|
369
|
+
"""
|
|
370
|
+
if not self.collection_uuid:
|
|
371
|
+
self._create_or_get_collection()
|
|
372
|
+
|
|
373
|
+
try:
|
|
374
|
+
# In a real implementation, we would likely have a specific endpoint for this
|
|
375
|
+
# In the absence of that, we'll use a search query with a filter
|
|
376
|
+
|
|
377
|
+
# We need to create a filter condition to search by ID
|
|
378
|
+
# This implementation will depend on how Fairo's API actually handles filters
|
|
379
|
+
payload = {
|
|
380
|
+
"filter": {
|
|
381
|
+
"metadata": {
|
|
382
|
+
"id": document_id
|
|
383
|
+
}
|
|
384
|
+
},
|
|
385
|
+
"k": 1,
|
|
386
|
+
"include_text": True,
|
|
387
|
+
"include_metadata": True
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
# Send request
|
|
391
|
+
search_results = self.client.post(
|
|
392
|
+
f"/collection_stores/{self.collection_uuid}/similarity_search",
|
|
393
|
+
json=payload
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
# Process response
|
|
397
|
+
results = search_results.get("results", [])
|
|
398
|
+
|
|
399
|
+
if not results:
|
|
400
|
+
return None
|
|
401
|
+
|
|
402
|
+
# Create Document from the first result
|
|
403
|
+
result = results[0]
|
|
404
|
+
return Document(
|
|
405
|
+
page_content=result.get("text", ""),
|
|
406
|
+
metadata=result.get("metadata", {})
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
except requests.exceptions.HTTPError as e:
|
|
410
|
+
print(f"Error retrieving document by ID: {str(e)}")
|
|
411
|
+
return None
|
|
412
|
+
|
|
413
|
+
def delete(self) -> None:
|
|
414
|
+
"""Delete the collection from Fairo."""
|
|
415
|
+
if not self.collection_uuid:
|
|
416
|
+
return
|
|
417
|
+
|
|
418
|
+
try:
|
|
419
|
+
self.client.delete(f"/collection_stores/{self.collection_uuid}")
|
|
420
|
+
print(f"Collection '{self.collection_name}' deleted successfully")
|
|
421
|
+
self.collection_uuid = None
|
|
422
|
+
|
|
423
|
+
except requests.exceptions.HTTPError as e:
|
|
424
|
+
print(f"Error deleting collection: {str(e)}")
|
|
425
|
+
|
|
426
|
+
@classmethod
|
|
427
|
+
def from_existing(cls,
|
|
428
|
+
collection_name: str,
|
|
429
|
+
username: str = None,
|
|
430
|
+
password: str = None,
|
|
431
|
+
api_url: str = "https://api.fairo.ai",
|
|
432
|
+
embedding_model_id: str = AWS_AI_EMBEDDING_MODEL,
|
|
433
|
+
region_name: str = None):
|
|
434
|
+
"""
|
|
435
|
+
Load an existing collection from Fairo.
|
|
436
|
+
|
|
437
|
+
Args:
|
|
438
|
+
collection_name: Name of the existing collection
|
|
439
|
+
username: Fairo API username
|
|
440
|
+
password: Fairo API password
|
|
441
|
+
api_url: Fairo API base URL
|
|
442
|
+
embedding_model_id: Bedrock embedding model ID
|
|
443
|
+
region_name: AWS region for Bedrock
|
|
444
|
+
|
|
445
|
+
Returns:
|
|
446
|
+
FairoVectorStore instance connected to the existing collection
|
|
447
|
+
"""
|
|
448
|
+
return cls(
|
|
449
|
+
collection_name=collection_name,
|
|
450
|
+
username=username,
|
|
451
|
+
password=password,
|
|
452
|
+
api_url=api_url,
|
|
453
|
+
embedding_model_id=embedding_model_id,
|
|
454
|
+
region_name=region_name,
|
|
455
|
+
create_if_not_exists=False
|
|
456
|
+
)
|
|
@@ -1,149 +0,0 @@
|
|
|
1
|
-
from typing import Any, Dict, List, Optional
|
|
2
|
-
from langchain_aws import BedrockEmbeddings
|
|
3
|
-
from langchain_community.embeddings.mlflow import MlflowEmbeddings
|
|
4
|
-
from langchain_core.documents import Document
|
|
5
|
-
from langchain_postgres import PGVector
|
|
6
|
-
|
|
7
|
-
from fairo.settings import get_mlflow_gateway_embeddings_route, get_mlflow_gateway_uri
|
|
8
|
-
|
|
9
|
-
AWS_AI_EMBEDDING_MODEL = 'cohere.embed-english-v3'
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class BaseVectorStore:
|
|
13
|
-
pass
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class PostgresVectorStore(BaseVectorStore):
|
|
17
|
-
"""
|
|
18
|
-
A PostgreSQL-based vector store using LangChain and pgvector
|
|
19
|
-
"""
|
|
20
|
-
|
|
21
|
-
def __init__(
|
|
22
|
-
self,
|
|
23
|
-
collection_name: str,
|
|
24
|
-
embedding_model_id: str = AWS_AI_EMBEDDING_MODEL,
|
|
25
|
-
region_name: str = None,
|
|
26
|
-
collection_metadata: dict = None,
|
|
27
|
-
connection_string: str = "postgresql://postgres:postgres@localhost:5432/vectordb",
|
|
28
|
-
pre_delete_collection: bool = False,
|
|
29
|
-
default_k: int = 5
|
|
30
|
-
):
|
|
31
|
-
"""
|
|
32
|
-
Args:
|
|
33
|
-
collection_name: Name of the collection in PostgreSQL
|
|
34
|
-
embedding_model_id: Bedrock embedding model ID
|
|
35
|
-
region_name: AWS region for Bedrock
|
|
36
|
-
collection_metadata: Dict for what metadata we want to add to collection
|
|
37
|
-
connection_string: PostgreSQL connection string
|
|
38
|
-
"""
|
|
39
|
-
self.collection_name = collection_name
|
|
40
|
-
self.connection_string = connection_string
|
|
41
|
-
|
|
42
|
-
# Set up embeddings
|
|
43
|
-
self.embeddings = MlflowEmbeddings(
|
|
44
|
-
target_uri=get_mlflow_gateway_uri(),
|
|
45
|
-
endpoint=get_mlflow_gateway_embeddings_route(),
|
|
46
|
-
)
|
|
47
|
-
|
|
48
|
-
if collection_metadata is not None:
|
|
49
|
-
self.collection_metadata = collection_metadata
|
|
50
|
-
|
|
51
|
-
# Initialize the PGVector store
|
|
52
|
-
self.db = PGVector(
|
|
53
|
-
collection_name=collection_name,
|
|
54
|
-
connection=connection_string,
|
|
55
|
-
collection_metadata=self.collection_metadata,
|
|
56
|
-
embeddings=self.embeddings,
|
|
57
|
-
pre_delete_collection=pre_delete_collection
|
|
58
|
-
)
|
|
59
|
-
|
|
60
|
-
self.default_k = default_k
|
|
61
|
-
|
|
62
|
-
def add_documents(self, documents: List[Document]) -> None:
|
|
63
|
-
"""
|
|
64
|
-
Args:
|
|
65
|
-
documents: List of Document objects to add
|
|
66
|
-
"""
|
|
67
|
-
if not documents:
|
|
68
|
-
return
|
|
69
|
-
|
|
70
|
-
# Add documents to PGVector
|
|
71
|
-
self.db.add_documents(documents)
|
|
72
|
-
|
|
73
|
-
def add_texts(self, texts: List[str], metadatas: Optional[List[Dict[str, Any]]] = None) -> None:
|
|
74
|
-
"""
|
|
75
|
-
Args:
|
|
76
|
-
texts: List of text strings to add
|
|
77
|
-
metadatas: Optional list of metadata dictionaries
|
|
78
|
-
"""
|
|
79
|
-
if not texts:
|
|
80
|
-
return
|
|
81
|
-
|
|
82
|
-
# Convert to Document objects
|
|
83
|
-
documents = []
|
|
84
|
-
for i, text in enumerate(texts):
|
|
85
|
-
metadata = metadatas[i] if metadatas and i < len(metadatas) else {}
|
|
86
|
-
documents.append(Document(page_content=text, metadata=metadata))
|
|
87
|
-
|
|
88
|
-
# Add to vector store
|
|
89
|
-
self.add_documents(documents)
|
|
90
|
-
|
|
91
|
-
@staticmethod
|
|
92
|
-
def _format_query(query):
|
|
93
|
-
# Temporary fix, need to consider model / do more than truncate
|
|
94
|
-
return query[0:2048]
|
|
95
|
-
|
|
96
|
-
def similarity_search(self, query: str, k: int = None) -> List[Document]:
|
|
97
|
-
"""
|
|
98
|
-
Args:
|
|
99
|
-
query: The search query
|
|
100
|
-
k: Number of results to return
|
|
101
|
-
"""
|
|
102
|
-
formatted_query = self._format_query(query)
|
|
103
|
-
if k is None:
|
|
104
|
-
k = self.default_k
|
|
105
|
-
return self.db.similarity_search(formatted_query, k=k)
|
|
106
|
-
|
|
107
|
-
def similarity_search_with_score(self, query: str, k: int = 4) -> List[tuple[Document, float]]:
|
|
108
|
-
"""
|
|
109
|
-
Args:
|
|
110
|
-
query: The search query
|
|
111
|
-
k: Number of results to return
|
|
112
|
-
"""
|
|
113
|
-
formatted_query = self._format_query(query)
|
|
114
|
-
if k is None:
|
|
115
|
-
k = self.default_k
|
|
116
|
-
return self.db.similarity_search_with_score(formatted_query, k=k)
|
|
117
|
-
|
|
118
|
-
def delete(self) -> None:
|
|
119
|
-
"""Delete the collection from PostgreSQL."""
|
|
120
|
-
try:
|
|
121
|
-
# Use the internal PGVector method to delete a collection
|
|
122
|
-
self.db._client.delete_collection(self.collection_name)
|
|
123
|
-
except Exception as e:
|
|
124
|
-
print(f"Error deleting collection: {str(e)}")
|
|
125
|
-
|
|
126
|
-
@classmethod
|
|
127
|
-
def from_existing(cls,
|
|
128
|
-
collection_name: str,
|
|
129
|
-
embedding_model_id: str = AWS_AI_EMBEDDING_MODEL,
|
|
130
|
-
region_name: str = None,
|
|
131
|
-
connection_string: str = "postgresql://postgres:postgres@localhost:5432/vectordb"):
|
|
132
|
-
"""
|
|
133
|
-
Load an existing collection from PostgreSQL.
|
|
134
|
-
|
|
135
|
-
Args:
|
|
136
|
-
collection_name: Name of the existing collection
|
|
137
|
-
embedding_model_id: Bedrock embedding model ID
|
|
138
|
-
region_name: AWS region for Bedrock
|
|
139
|
-
connection_string: PostgreSQL connection string
|
|
140
|
-
|
|
141
|
-
Returns:
|
|
142
|
-
PostgresVectorStore instance connected to the existing collection
|
|
143
|
-
"""
|
|
144
|
-
return cls(
|
|
145
|
-
collection_name=collection_name,
|
|
146
|
-
embedding_model_id=embedding_model_id,
|
|
147
|
-
region_name=region_name,
|
|
148
|
-
connection_string=connection_string
|
|
149
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|