lfx-nightly 0.1.12.dev27__py3-none-any.whl → 0.1.12.dev28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lfx/base/agents/agent.py +19 -11
- lfx/base/agents/utils.py +18 -0
- lfx/base/data/base_file.py +28 -19
- lfx/components/data/__init__.py +0 -6
- lfx/components/data/file.py +1 -1
- lfx/components/data/mock_data.py +5 -8
- lfx/components/data/save_file.py +625 -0
- lfx/components/data/web_search.py +225 -11
- lfx/components/docling/docling_remote.py +4 -1
- lfx/components/input_output/chat.py +8 -1
- lfx/components/nvidia/nvidia.py +1 -4
- lfx/components/processing/__init__.py +3 -3
- lfx/components/processing/dataframe_to_toolset.py +259 -0
- lfx/components/processing/lambda_filter.py +3 -3
- lfx/schema/image.py +72 -19
- lfx/schema/message.py +7 -2
- lfx/services/settings/base.py +7 -0
- lfx/utils/util.py +135 -0
- {lfx_nightly-0.1.12.dev27.dist-info → lfx_nightly-0.1.12.dev28.dist-info}/METADATA +1 -1
- {lfx_nightly-0.1.12.dev27.dist-info → lfx_nightly-0.1.12.dev28.dist-info}/RECORD +22 -23
- lfx/components/data/news_search.py +0 -164
- lfx/components/data/rss.py +0 -69
- lfx/components/processing/save_file.py +0 -225
- {lfx_nightly-0.1.12.dev27.dist-info → lfx_nightly-0.1.12.dev28.dist-info}/WHEEL +0 -0
- {lfx_nightly-0.1.12.dev27.dist-info → lfx_nightly-0.1.12.dev28.dist-info}/entry_points.txt +0 -0
|
@@ -1,43 +1,133 @@
|
|
|
1
|
+
"""Unified Web Search Component.
|
|
2
|
+
|
|
3
|
+
This component consolidates Web Search, News Search, and RSS Reader into a single
|
|
4
|
+
component with tabs for different search modes.
|
|
5
|
+
"""
|
|
6
|
+
|
|
1
7
|
import re
|
|
2
|
-
from
|
|
8
|
+
from typing import Any
|
|
9
|
+
from urllib.parse import parse_qs, quote_plus, unquote, urlparse
|
|
3
10
|
|
|
4
11
|
import pandas as pd
|
|
5
12
|
import requests
|
|
6
13
|
from bs4 import BeautifulSoup
|
|
7
14
|
|
|
8
15
|
from lfx.custom import Component
|
|
9
|
-
from lfx.io import IntInput, MessageTextInput, Output
|
|
16
|
+
from lfx.io import IntInput, MessageTextInput, Output, TabInput
|
|
10
17
|
from lfx.schema import DataFrame
|
|
11
18
|
from lfx.utils.request_utils import get_user_agent
|
|
12
19
|
|
|
13
20
|
|
|
14
21
|
class WebSearchComponent(Component):
|
|
15
22
|
display_name = "Web Search"
|
|
16
|
-
description = "
|
|
23
|
+
description = "Search the web, news, or RSS feeds."
|
|
17
24
|
documentation: str = "https://docs.langflow.org/components-data#web-search"
|
|
18
25
|
icon = "search"
|
|
19
|
-
name = "
|
|
26
|
+
name = "UnifiedWebSearch"
|
|
20
27
|
|
|
21
28
|
inputs = [
|
|
29
|
+
TabInput(
|
|
30
|
+
name="search_mode",
|
|
31
|
+
display_name="Search Mode",
|
|
32
|
+
options=["Web", "News", "RSS"],
|
|
33
|
+
info="Choose search mode: Web (DuckDuckGo), News (Google News), or RSS (Feed Reader)",
|
|
34
|
+
value="Web",
|
|
35
|
+
real_time_refresh=True,
|
|
36
|
+
tool_mode=True,
|
|
37
|
+
),
|
|
22
38
|
MessageTextInput(
|
|
23
39
|
name="query",
|
|
24
40
|
display_name="Search Query",
|
|
25
|
-
info="
|
|
41
|
+
info="Search keywords for news articles.",
|
|
26
42
|
tool_mode=True,
|
|
27
43
|
required=True,
|
|
28
44
|
),
|
|
45
|
+
MessageTextInput(
|
|
46
|
+
name="hl",
|
|
47
|
+
display_name="Language (hl)",
|
|
48
|
+
info="Language code, e.g. en-US, fr, de. Default: en-US.",
|
|
49
|
+
tool_mode=False,
|
|
50
|
+
input_types=[],
|
|
51
|
+
required=False,
|
|
52
|
+
advanced=True,
|
|
53
|
+
),
|
|
54
|
+
MessageTextInput(
|
|
55
|
+
name="gl",
|
|
56
|
+
display_name="Country (gl)",
|
|
57
|
+
info="Country code, e.g. US, FR, DE. Default: US.",
|
|
58
|
+
tool_mode=False,
|
|
59
|
+
input_types=[],
|
|
60
|
+
required=False,
|
|
61
|
+
advanced=True,
|
|
62
|
+
),
|
|
63
|
+
MessageTextInput(
|
|
64
|
+
name="ceid",
|
|
65
|
+
display_name="Country:Language (ceid)",
|
|
66
|
+
info="e.g. US:en, FR:fr. Default: US:en.",
|
|
67
|
+
tool_mode=False,
|
|
68
|
+
value="US:en",
|
|
69
|
+
input_types=[],
|
|
70
|
+
required=False,
|
|
71
|
+
advanced=True,
|
|
72
|
+
),
|
|
73
|
+
MessageTextInput(
|
|
74
|
+
name="topic",
|
|
75
|
+
display_name="Topic",
|
|
76
|
+
info="One of: WORLD, NATION, BUSINESS, TECHNOLOGY, ENTERTAINMENT, SCIENCE, SPORTS, HEALTH.",
|
|
77
|
+
tool_mode=False,
|
|
78
|
+
input_types=[],
|
|
79
|
+
required=False,
|
|
80
|
+
advanced=True,
|
|
81
|
+
),
|
|
82
|
+
MessageTextInput(
|
|
83
|
+
name="location",
|
|
84
|
+
display_name="Location (Geo)",
|
|
85
|
+
info="City, state, or country for location-based news. Leave blank for keyword search.",
|
|
86
|
+
tool_mode=False,
|
|
87
|
+
input_types=[],
|
|
88
|
+
required=False,
|
|
89
|
+
advanced=True,
|
|
90
|
+
),
|
|
29
91
|
IntInput(
|
|
30
92
|
name="timeout",
|
|
31
93
|
display_name="Timeout",
|
|
32
|
-
info="Timeout for the
|
|
94
|
+
info="Timeout for the request in seconds.",
|
|
33
95
|
value=5,
|
|
96
|
+
required=False,
|
|
34
97
|
advanced=True,
|
|
35
98
|
),
|
|
36
99
|
]
|
|
37
100
|
|
|
38
|
-
outputs = [Output(name="results", display_name="
|
|
101
|
+
outputs = [Output(name="results", display_name="Results", method="perform_search")]
|
|
102
|
+
|
|
103
|
+
def __init__(self, **kwargs):
|
|
104
|
+
super().__init__(**kwargs)
|
|
105
|
+
|
|
106
|
+
def update_build_config(self, build_config: dict, field_value: Any, field_name: str | None = None) -> dict:
|
|
107
|
+
"""Update input visibility based on search mode."""
|
|
108
|
+
if field_name == "search_mode":
|
|
109
|
+
# Show/hide inputs based on search mode
|
|
110
|
+
is_news = field_value == "News"
|
|
111
|
+
is_rss = field_value == "RSS"
|
|
112
|
+
|
|
113
|
+
# Update query field info based on mode
|
|
114
|
+
if is_rss:
|
|
115
|
+
build_config["query"]["info"] = "RSS feed URL to parse"
|
|
116
|
+
build_config["query"]["display_name"] = "RSS Feed URL"
|
|
117
|
+
elif is_news:
|
|
118
|
+
build_config["query"]["info"] = "Search keywords for news articles."
|
|
119
|
+
build_config["query"]["display_name"] = "Search Query"
|
|
120
|
+
else: # Web
|
|
121
|
+
build_config["query"]["info"] = "Keywords to search for"
|
|
122
|
+
build_config["query"]["display_name"] = "Search Query"
|
|
123
|
+
|
|
124
|
+
# Keep news-specific fields as advanced (matching original News Search component)
|
|
125
|
+
# They remain advanced=True in all modes, just like in the original component
|
|
126
|
+
|
|
127
|
+
return build_config
|
|
39
128
|
|
|
40
129
|
def validate_url(self, string: str) -> bool:
|
|
130
|
+
"""Validate URL format."""
|
|
41
131
|
url_regex = re.compile(
|
|
42
132
|
r"^(https?:\/\/)?" r"(www\.)?" r"([a-zA-Z0-9.-]+)" r"(\.[a-zA-Z]{2,})?" r"(:\d+)?" r"(\/[^\s]*)?$",
|
|
43
133
|
re.IGNORECASE,
|
|
@@ -45,6 +135,7 @@ class WebSearchComponent(Component):
|
|
|
45
135
|
return bool(url_regex.match(string))
|
|
46
136
|
|
|
47
137
|
def ensure_url(self, url: str) -> str:
|
|
138
|
+
"""Ensure URL has proper protocol."""
|
|
48
139
|
if not url.startswith(("http://", "https://")):
|
|
49
140
|
url = "https://" + url
|
|
50
141
|
if not self.validate_url(url):
|
|
@@ -54,14 +145,19 @@ class WebSearchComponent(Component):
|
|
|
54
145
|
|
|
55
146
|
def _sanitize_query(self, query: str) -> str:
|
|
56
147
|
"""Sanitize search query."""
|
|
57
|
-
# Remove potentially dangerous characters
|
|
58
148
|
return re.sub(r'[<>"\']', "", query.strip())
|
|
59
149
|
|
|
60
|
-
def
|
|
150
|
+
def clean_html(self, html_string: str) -> str:
|
|
151
|
+
"""Remove HTML tags from text."""
|
|
152
|
+
return BeautifulSoup(html_string, "html.parser").get_text(separator=" ", strip=True)
|
|
153
|
+
|
|
154
|
+
def perform_web_search(self) -> DataFrame:
|
|
155
|
+
"""Perform DuckDuckGo web search."""
|
|
61
156
|
query = self._sanitize_query(self.query)
|
|
62
157
|
if not query:
|
|
63
158
|
msg = "Empty search query"
|
|
64
159
|
raise ValueError(msg)
|
|
160
|
+
|
|
65
161
|
headers = {"User-Agent": get_user_agent()}
|
|
66
162
|
params = {"q": query, "kl": "us-en"}
|
|
67
163
|
url = "https://html.duckduckgo.com/html/"
|
|
@@ -78,6 +174,7 @@ class WebSearchComponent(Component):
|
|
|
78
174
|
return DataFrame(
|
|
79
175
|
pd.DataFrame([{"title": "Error", "link": "", "snippet": "No results found", "content": ""}])
|
|
80
176
|
)
|
|
177
|
+
|
|
81
178
|
soup = BeautifulSoup(response.text, "html.parser")
|
|
82
179
|
results = []
|
|
83
180
|
|
|
@@ -108,5 +205,122 @@ class WebSearchComponent(Component):
|
|
|
108
205
|
}
|
|
109
206
|
)
|
|
110
207
|
|
|
111
|
-
|
|
112
|
-
|
|
208
|
+
return DataFrame(pd.DataFrame(results))
|
|
209
|
+
|
|
210
|
+
def perform_news_search(self) -> DataFrame:
|
|
211
|
+
"""Perform Google News search."""
|
|
212
|
+
query = getattr(self, "query", "")
|
|
213
|
+
hl = getattr(self, "hl", "en-US") or "en-US"
|
|
214
|
+
gl = getattr(self, "gl", "US") or "US"
|
|
215
|
+
topic = getattr(self, "topic", None)
|
|
216
|
+
location = getattr(self, "location", None)
|
|
217
|
+
|
|
218
|
+
ceid = f"{gl}:{hl.split('-')[0]}"
|
|
219
|
+
|
|
220
|
+
# Build RSS URL based on parameters
|
|
221
|
+
if topic:
|
|
222
|
+
# Topic-based feed
|
|
223
|
+
base_url = f"https://news.google.com/rss/headlines/section/topic/{quote_plus(topic.upper())}"
|
|
224
|
+
params = f"?hl={hl}&gl={gl}&ceid={ceid}"
|
|
225
|
+
rss_url = base_url + params
|
|
226
|
+
elif location:
|
|
227
|
+
# Location-based feed
|
|
228
|
+
base_url = f"https://news.google.com/rss/headlines/section/geo/{quote_plus(location)}"
|
|
229
|
+
params = f"?hl={hl}&gl={gl}&ceid={ceid}"
|
|
230
|
+
rss_url = base_url + params
|
|
231
|
+
elif query:
|
|
232
|
+
# Keyword search feed
|
|
233
|
+
base_url = "https://news.google.com/rss/search?q="
|
|
234
|
+
query_encoded = quote_plus(query)
|
|
235
|
+
params = f"&hl={hl}&gl={gl}&ceid={ceid}"
|
|
236
|
+
rss_url = f"{base_url}{query_encoded}{params}"
|
|
237
|
+
else:
|
|
238
|
+
self.status = "No search query, topic, or location provided."
|
|
239
|
+
return DataFrame(
|
|
240
|
+
pd.DataFrame(
|
|
241
|
+
[{"title": "Error", "link": "", "published": "", "summary": "No search parameters provided"}]
|
|
242
|
+
)
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
try:
|
|
246
|
+
response = requests.get(rss_url, timeout=self.timeout)
|
|
247
|
+
response.raise_for_status()
|
|
248
|
+
soup = BeautifulSoup(response.content, "xml")
|
|
249
|
+
items = soup.find_all("item")
|
|
250
|
+
except requests.RequestException as e:
|
|
251
|
+
self.status = f"Failed to fetch news: {e}"
|
|
252
|
+
return DataFrame(pd.DataFrame([{"title": "Error", "link": "", "published": "", "summary": str(e)}]))
|
|
253
|
+
|
|
254
|
+
if not items:
|
|
255
|
+
self.status = "No news articles found."
|
|
256
|
+
return DataFrame(pd.DataFrame([{"title": "No articles found", "link": "", "published": "", "summary": ""}]))
|
|
257
|
+
|
|
258
|
+
articles = []
|
|
259
|
+
for item in items:
|
|
260
|
+
try:
|
|
261
|
+
title = self.clean_html(item.title.text if item.title else "")
|
|
262
|
+
link = item.link.text if item.link else ""
|
|
263
|
+
published = item.pubDate.text if item.pubDate else ""
|
|
264
|
+
summary = self.clean_html(item.description.text if item.description else "")
|
|
265
|
+
articles.append({"title": title, "link": link, "published": published, "summary": summary})
|
|
266
|
+
except (AttributeError, ValueError, TypeError) as e:
|
|
267
|
+
self.log(f"Error parsing article: {e!s}")
|
|
268
|
+
continue
|
|
269
|
+
|
|
270
|
+
return DataFrame(pd.DataFrame(articles))
|
|
271
|
+
|
|
272
|
+
def perform_rss_read(self) -> DataFrame:
|
|
273
|
+
"""Read RSS feed."""
|
|
274
|
+
rss_url = getattr(self, "query", "")
|
|
275
|
+
if not rss_url:
|
|
276
|
+
return DataFrame(
|
|
277
|
+
pd.DataFrame([{"title": "Error", "link": "", "published": "", "summary": "No RSS URL provided"}])
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
try:
|
|
281
|
+
response = requests.get(rss_url, timeout=self.timeout)
|
|
282
|
+
response.raise_for_status()
|
|
283
|
+
if not response.content.strip():
|
|
284
|
+
msg = "Empty response received"
|
|
285
|
+
raise ValueError(msg)
|
|
286
|
+
|
|
287
|
+
# Validate XML
|
|
288
|
+
try:
|
|
289
|
+
BeautifulSoup(response.content, "xml")
|
|
290
|
+
except Exception as e:
|
|
291
|
+
msg = f"Invalid XML response: {e}"
|
|
292
|
+
raise ValueError(msg) from e
|
|
293
|
+
|
|
294
|
+
soup = BeautifulSoup(response.content, "xml")
|
|
295
|
+
items = soup.find_all("item")
|
|
296
|
+
except (requests.RequestException, ValueError) as e:
|
|
297
|
+
self.status = f"Failed to fetch RSS: {e}"
|
|
298
|
+
return DataFrame(pd.DataFrame([{"title": "Error", "link": "", "published": "", "summary": str(e)}]))
|
|
299
|
+
|
|
300
|
+
articles = [
|
|
301
|
+
{
|
|
302
|
+
"title": item.title.text if item.title else "",
|
|
303
|
+
"link": item.link.text if item.link else "",
|
|
304
|
+
"published": item.pubDate.text if item.pubDate else "",
|
|
305
|
+
"summary": item.description.text if item.description else "",
|
|
306
|
+
}
|
|
307
|
+
for item in items
|
|
308
|
+
]
|
|
309
|
+
|
|
310
|
+
# Ensure DataFrame has correct columns even if empty
|
|
311
|
+
df_articles = pd.DataFrame(articles, columns=["title", "link", "published", "summary"])
|
|
312
|
+
self.log(f"Fetched {len(df_articles)} articles.")
|
|
313
|
+
return DataFrame(df_articles)
|
|
314
|
+
|
|
315
|
+
def perform_search(self) -> DataFrame:
|
|
316
|
+
"""Main search method that routes to appropriate search function based on mode."""
|
|
317
|
+
search_mode = getattr(self, "search_mode", "Web")
|
|
318
|
+
|
|
319
|
+
if search_mode == "Web":
|
|
320
|
+
return self.perform_web_search()
|
|
321
|
+
if search_mode == "News":
|
|
322
|
+
return self.perform_news_search()
|
|
323
|
+
if search_mode == "RSS":
|
|
324
|
+
return self.perform_rss_read()
|
|
325
|
+
# Fallback to web search
|
|
326
|
+
return self.perform_web_search()
|
|
@@ -12,6 +12,7 @@ from lfx.base.data import BaseFileComponent
|
|
|
12
12
|
from lfx.inputs import IntInput, NestedDictInput, StrInput
|
|
13
13
|
from lfx.inputs.inputs import FloatInput
|
|
14
14
|
from lfx.schema import Data
|
|
15
|
+
from lfx.utils.util import transform_localhost_url
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
class DoclingRemoteComponent(BaseFileComponent):
|
|
@@ -103,7 +104,9 @@ class DoclingRemoteComponent(BaseFileComponent):
|
|
|
103
104
|
]
|
|
104
105
|
|
|
105
106
|
def process_files(self, file_list: list[BaseFileComponent.BaseFile]) -> list[BaseFileComponent.BaseFile]:
|
|
106
|
-
|
|
107
|
+
# Transform localhost URLs to container-accessible hosts when running in a container
|
|
108
|
+
transformed_url = transform_localhost_url(self.api_url)
|
|
109
|
+
base_url = f"{transformed_url}/v1"
|
|
107
110
|
|
|
108
111
|
def _convert_document(client: httpx.Client, file_path: Path, options: dict[str, Any]) -> Data | None:
|
|
109
112
|
encoded_doc = base64.b64encode(file_path.read_bytes()).decode()
|
|
@@ -75,12 +75,19 @@ class ChatInput(ChatComponent):
|
|
|
75
75
|
]
|
|
76
76
|
|
|
77
77
|
async def message_response(self) -> Message:
|
|
78
|
+
# Ensure files is a list and filter out empty/None values
|
|
79
|
+
files = self.files if self.files else []
|
|
80
|
+
if files and not isinstance(files, list):
|
|
81
|
+
files = [files]
|
|
82
|
+
# Filter out None/empty values
|
|
83
|
+
files = [f for f in files if f is not None and f != ""]
|
|
84
|
+
|
|
78
85
|
message = await Message.create(
|
|
79
86
|
text=self.input_value,
|
|
80
87
|
sender=self.sender,
|
|
81
88
|
sender_name=self.sender_name,
|
|
82
89
|
session_id=self.session_id,
|
|
83
|
-
files=
|
|
90
|
+
files=files,
|
|
84
91
|
)
|
|
85
92
|
if self.session_id and isinstance(message, Message) and self.should_store_message:
|
|
86
93
|
stored_message = await self.send_message(
|
lfx/components/nvidia/nvidia.py
CHANGED
|
@@ -1,8 +1,5 @@
|
|
|
1
1
|
from typing import Any
|
|
2
2
|
|
|
3
|
-
from requests.exceptions import ConnectionError # noqa: A004
|
|
4
|
-
from urllib3.exceptions import MaxRetryError, NameResolutionError
|
|
5
|
-
|
|
6
3
|
from lfx.base.models.model import LCModelComponent
|
|
7
4
|
from lfx.field_typing import LanguageModel
|
|
8
5
|
from lfx.field_typing.range_spec import RangeSpec
|
|
@@ -27,7 +24,7 @@ class NVIDIAModelComponent(LCModelComponent):
|
|
|
27
24
|
except ImportError as e:
|
|
28
25
|
msg = "Please install langchain-nvidia-ai-endpoints to use the NVIDIA model."
|
|
29
26
|
raise ImportError(msg) from e
|
|
30
|
-
except
|
|
27
|
+
except Exception: # noqa: BLE001
|
|
31
28
|
logger.warning(
|
|
32
29
|
"Failed to connect to NVIDIA API. Model list may be unavailable."
|
|
33
30
|
" Please check your internet connection and API credentials."
|
|
@@ -15,6 +15,7 @@ if TYPE_CHECKING:
|
|
|
15
15
|
from lfx.components.processing.data_operations import DataOperationsComponent
|
|
16
16
|
from lfx.components.processing.data_to_dataframe import DataToDataFrameComponent
|
|
17
17
|
from lfx.components.processing.dataframe_operations import DataFrameOperationsComponent
|
|
18
|
+
from lfx.components.processing.dataframe_to_toolset import DataFrameToToolsetComponent
|
|
18
19
|
from lfx.components.processing.extract_key import ExtractDataKeyComponent
|
|
19
20
|
from lfx.components.processing.filter_data import FilterDataComponent
|
|
20
21
|
from lfx.components.processing.filter_data_values import DataFilterComponent
|
|
@@ -30,7 +31,6 @@ if TYPE_CHECKING:
|
|
|
30
31
|
from lfx.components.processing.prompt import PromptComponent
|
|
31
32
|
from lfx.components.processing.python_repl_core import PythonREPLComponent
|
|
32
33
|
from lfx.components.processing.regex import RegexExtractorComponent
|
|
33
|
-
from lfx.components.processing.save_file import SaveToFileComponent
|
|
34
34
|
from lfx.components.processing.select_data import SelectDataComponent
|
|
35
35
|
from lfx.components.processing.split_text import SplitTextComponent
|
|
36
36
|
from lfx.components.processing.structured_output import StructuredOutputComponent
|
|
@@ -45,6 +45,7 @@ _dynamic_imports = {
|
|
|
45
45
|
"DataOperationsComponent": "data_operations",
|
|
46
46
|
"DataToDataFrameComponent": "data_to_dataframe",
|
|
47
47
|
"DataFrameOperationsComponent": "dataframe_operations",
|
|
48
|
+
"DataFrameToToolsetComponent": "dataframe_to_toolset",
|
|
48
49
|
"ExtractDataKeyComponent": "extract_key",
|
|
49
50
|
"FilterDataComponent": "filter_data",
|
|
50
51
|
"DataFilterComponent": "filter_data_values",
|
|
@@ -60,7 +61,6 @@ _dynamic_imports = {
|
|
|
60
61
|
"PromptComponent": "prompt",
|
|
61
62
|
"PythonREPLComponent": "python_repl_core",
|
|
62
63
|
"RegexExtractorComponent": "regex",
|
|
63
|
-
"SaveToFileComponent": "save_file",
|
|
64
64
|
"SelectDataComponent": "select_data",
|
|
65
65
|
"SplitTextComponent": "split_text",
|
|
66
66
|
"StructuredOutputComponent": "structured_output",
|
|
@@ -74,6 +74,7 @@ __all__ = [
|
|
|
74
74
|
"CreateDataComponent",
|
|
75
75
|
"DataFilterComponent",
|
|
76
76
|
"DataFrameOperationsComponent",
|
|
77
|
+
"DataFrameToToolsetComponent",
|
|
77
78
|
"DataOperationsComponent",
|
|
78
79
|
"DataToDataFrameComponent",
|
|
79
80
|
"ExtractDataKeyComponent",
|
|
@@ -90,7 +91,6 @@ __all__ = [
|
|
|
90
91
|
"PromptComponent",
|
|
91
92
|
"PythonREPLComponent",
|
|
92
93
|
"RegexExtractorComponent",
|
|
93
|
-
"SaveToFileComponent",
|
|
94
94
|
"SelectDataComponent",
|
|
95
95
|
"SplitTextComponent",
|
|
96
96
|
"StructuredOutputComponent",
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
"""DataFrame to Toolset Component.
|
|
2
|
+
|
|
3
|
+
This component converts each row of a DataFrame into a callable tool/action within a toolset.
|
|
4
|
+
Each row becomes a tool where the action name comes from one column and the content/response
|
|
5
|
+
comes from another column.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
from typing import TYPE_CHECKING
|
|
12
|
+
|
|
13
|
+
from langchain.tools import StructuredTool
|
|
14
|
+
from pydantic import BaseModel, create_model
|
|
15
|
+
|
|
16
|
+
from lfx.base.langchain_utilities.model import LCToolComponent
|
|
17
|
+
from lfx.field_typing.constants import Tool
|
|
18
|
+
from lfx.io import HandleInput, Output, StrInput
|
|
19
|
+
from lfx.schema.data import Data
|
|
20
|
+
from lfx.schema.dataframe import DataFrame
|
|
21
|
+
from lfx.schema.message import Message
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from lfx.field_typing.constants import Tool
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class DataFrameToToolsetComponent(LCToolComponent):
|
|
28
|
+
"""Component that converts DataFrame rows into a toolset with multiple callable actions."""
|
|
29
|
+
|
|
30
|
+
display_name = "DataFrame to Toolset"
|
|
31
|
+
description = "Convert each row of a DataFrame into a callable tool/action in a toolset."
|
|
32
|
+
icon = "wrench"
|
|
33
|
+
name = "DataFrameToToolset"
|
|
34
|
+
|
|
35
|
+
inputs = [
|
|
36
|
+
HandleInput(
|
|
37
|
+
name="dataframe",
|
|
38
|
+
display_name="DataFrame",
|
|
39
|
+
input_types=["DataFrame"],
|
|
40
|
+
info="DataFrame where each row will become a tool/action",
|
|
41
|
+
required=True,
|
|
42
|
+
),
|
|
43
|
+
StrInput(
|
|
44
|
+
name="tool_name_column",
|
|
45
|
+
display_name="Tool Name Column",
|
|
46
|
+
info="Column with tool names",
|
|
47
|
+
required=True,
|
|
48
|
+
),
|
|
49
|
+
StrInput(
|
|
50
|
+
name="tool_output_column",
|
|
51
|
+
display_name="Tool Output Column",
|
|
52
|
+
info="Column with tool outputs/responses",
|
|
53
|
+
required=True,
|
|
54
|
+
),
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
outputs = [
|
|
58
|
+
Output(display_name="Tools", name="tools", method="build_tools"),
|
|
59
|
+
Output(display_name="Message", name="message", method="get_message"),
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
def __init__(self, **kwargs):
|
|
63
|
+
super().__init__(**kwargs)
|
|
64
|
+
self._tools_cache: list[Tool] = []
|
|
65
|
+
self._action_data: dict[str, dict[str, str]] = {}
|
|
66
|
+
|
|
67
|
+
def _sanitize_tool_name(self, name: str) -> str:
|
|
68
|
+
"""Sanitize tool name to match required format '^[a-zA-Z0-9_-]+$'."""
|
|
69
|
+
# Replace any non-alphanumeric characters (except _ and -) with underscores
|
|
70
|
+
sanitized = re.sub(r"[^a-zA-Z0-9_-]", "_", str(name))
|
|
71
|
+
# Ensure it starts with a letter or underscore
|
|
72
|
+
if sanitized and not sanitized[0].isalpha() and sanitized[0] != "_":
|
|
73
|
+
sanitized = f"tool_{sanitized}"
|
|
74
|
+
return sanitized or "unnamed_tool"
|
|
75
|
+
|
|
76
|
+
def _prepare_action_data(self) -> None:
|
|
77
|
+
"""Prepare action data from DataFrame."""
|
|
78
|
+
# Check if dataframe exists and is valid
|
|
79
|
+
if not hasattr(self, "dataframe") or self.dataframe is None:
|
|
80
|
+
self._action_data = {}
|
|
81
|
+
return
|
|
82
|
+
|
|
83
|
+
if not isinstance(self.dataframe, DataFrame):
|
|
84
|
+
self._action_data = {}
|
|
85
|
+
return
|
|
86
|
+
|
|
87
|
+
if not hasattr(self.dataframe, "columns"):
|
|
88
|
+
self._action_data = {}
|
|
89
|
+
return
|
|
90
|
+
|
|
91
|
+
# Check if column names are provided
|
|
92
|
+
if not self.tool_name_column or not self.tool_output_column:
|
|
93
|
+
self._action_data = {}
|
|
94
|
+
return
|
|
95
|
+
|
|
96
|
+
if self.tool_name_column not in self.dataframe.columns:
|
|
97
|
+
msg = (
|
|
98
|
+
f"Tool name column '{self.tool_name_column}' not found in DataFrame columns: "
|
|
99
|
+
f"{list(self.dataframe.columns)}"
|
|
100
|
+
)
|
|
101
|
+
raise ValueError(msg)
|
|
102
|
+
|
|
103
|
+
if self.tool_output_column not in self.dataframe.columns:
|
|
104
|
+
msg = (
|
|
105
|
+
f"Tool output column '{self.tool_output_column}' not found in DataFrame columns: "
|
|
106
|
+
f"{list(self.dataframe.columns)}"
|
|
107
|
+
)
|
|
108
|
+
raise ValueError(msg)
|
|
109
|
+
|
|
110
|
+
# Clear previous data
|
|
111
|
+
self._action_data = {}
|
|
112
|
+
|
|
113
|
+
# Process each row to create action mappings
|
|
114
|
+
for _, row in self.dataframe.iterrows():
|
|
115
|
+
action_name = str(row[self.tool_name_column]).strip()
|
|
116
|
+
content = str(row[self.tool_output_column]).strip()
|
|
117
|
+
|
|
118
|
+
if action_name and content:
|
|
119
|
+
sanitized_name = self._sanitize_tool_name(action_name)
|
|
120
|
+
self._action_data[sanitized_name] = {
|
|
121
|
+
"original_name": action_name,
|
|
122
|
+
"content": content,
|
|
123
|
+
"sanitized_name": sanitized_name,
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
def _create_action_function(self, action_name: str, content: str):
|
|
127
|
+
"""Create a function for a specific action that returns the content."""
|
|
128
|
+
|
|
129
|
+
def action_function(**kwargs) -> str:
|
|
130
|
+
# You could extend this to use kwargs to modify the content
|
|
131
|
+
# For now, just return the stored content
|
|
132
|
+
self.log(kwargs) # TODO: Coming soon: implement arguments to modify content
|
|
133
|
+
return content
|
|
134
|
+
|
|
135
|
+
action_function.__name__ = f"execute_{action_name}"
|
|
136
|
+
action_function.__doc__ = f"Execute {action_name} action and return the associated content."
|
|
137
|
+
return action_function
|
|
138
|
+
|
|
139
|
+
def build_tools(self) -> list[Tool]:
|
|
140
|
+
"""Build the toolset from DataFrame data."""
|
|
141
|
+
# Handle case where inputs are not ready
|
|
142
|
+
if not hasattr(self, "dataframe") or self.dataframe is None:
|
|
143
|
+
return []
|
|
144
|
+
|
|
145
|
+
self._prepare_action_data()
|
|
146
|
+
|
|
147
|
+
if not self._action_data:
|
|
148
|
+
return []
|
|
149
|
+
|
|
150
|
+
tools_description_preview_length = 100
|
|
151
|
+
tools_description_content_length = 200
|
|
152
|
+
|
|
153
|
+
tools = []
|
|
154
|
+
|
|
155
|
+
for sanitized_name, action_info in self._action_data.items():
|
|
156
|
+
original_name = action_info["original_name"]
|
|
157
|
+
content = action_info["content"]
|
|
158
|
+
|
|
159
|
+
# Create a simple schema for this tool (no parameters needed)
|
|
160
|
+
# But we could extend this to accept parameters if needed
|
|
161
|
+
tool_schema = create_model(
|
|
162
|
+
f"{sanitized_name}Schema",
|
|
163
|
+
__base__=BaseModel,
|
|
164
|
+
# Add parameters here if you want the tools to accept inputs
|
|
165
|
+
# For now, keeping it simple with no parameters
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
# Create the tool function
|
|
169
|
+
tool_function = self._create_action_function(sanitized_name, content)
|
|
170
|
+
|
|
171
|
+
# Create the StructuredTool
|
|
172
|
+
tool = StructuredTool(
|
|
173
|
+
name=sanitized_name,
|
|
174
|
+
description=(
|
|
175
|
+
f"Execute {original_name} action. Returns: "
|
|
176
|
+
f"{content[:tools_description_preview_length]}"
|
|
177
|
+
f"{'...' if len(content) > tools_description_preview_length else ''}"
|
|
178
|
+
),
|
|
179
|
+
func=tool_function,
|
|
180
|
+
args_schema=tool_schema,
|
|
181
|
+
handle_tool_error=True,
|
|
182
|
+
tags=[sanitized_name],
|
|
183
|
+
metadata={
|
|
184
|
+
"display_name": original_name,
|
|
185
|
+
"display_description": f"Action: {original_name}",
|
|
186
|
+
"original_name": original_name,
|
|
187
|
+
"content_preview": content[:tools_description_content_length],
|
|
188
|
+
},
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
tools.append(tool)
|
|
192
|
+
|
|
193
|
+
self._tools_cache = tools
|
|
194
|
+
return tools
|
|
195
|
+
|
|
196
|
+
def build_tool(self) -> Tool:
|
|
197
|
+
"""Build a single tool (for compatibility with LCToolComponent)."""
|
|
198
|
+
tools = self.build_tools()
|
|
199
|
+
if not tools:
|
|
200
|
+
# Return a placeholder tool when no data is available
|
|
201
|
+
def placeholder_function(**kwargs) -> str:
|
|
202
|
+
self.log(kwargs) # TODO: Coming soon: implement arguments to modify content
|
|
203
|
+
return "No tools available. Please connect a DataFrame with appropriate columns."
|
|
204
|
+
|
|
205
|
+
return StructuredTool(
|
|
206
|
+
name="placeholder_tool",
|
|
207
|
+
description="Placeholder tool - waiting for DataFrame input",
|
|
208
|
+
func=placeholder_function,
|
|
209
|
+
args_schema=create_model("PlaceholderSchema", __base__=BaseModel),
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
# Return the first tool, or create a composite tool
|
|
213
|
+
return tools[0]
|
|
214
|
+
|
|
215
|
+
def get_message(self) -> Message:
|
|
216
|
+
"""Get a message describing the created toolset."""
|
|
217
|
+
# Handle case where inputs are not ready
|
|
218
|
+
if not hasattr(self, "dataframe") or self.dataframe is None:
|
|
219
|
+
return Message(text="Waiting for DataFrame input...")
|
|
220
|
+
|
|
221
|
+
self._prepare_action_data()
|
|
222
|
+
|
|
223
|
+
if not self._action_data:
|
|
224
|
+
return Message(text="No tools were created. Please check your DataFrame and column selections.")
|
|
225
|
+
|
|
226
|
+
tool_count = len(self._action_data)
|
|
227
|
+
tool_names = [info["original_name"] for info in self._action_data.values()]
|
|
228
|
+
|
|
229
|
+
message_text = f"Created toolset with {tool_count} tools:\n"
|
|
230
|
+
for i, name in enumerate(tool_names, 1):
|
|
231
|
+
message_text += f"{i}. {name}\n"
|
|
232
|
+
|
|
233
|
+
return Message(text=message_text)
|
|
234
|
+
|
|
235
|
+
def run_model(self) -> list[Data]:
|
|
236
|
+
"""Run the model and return tool information as Data objects."""
|
|
237
|
+
# Handle case where inputs are not ready
|
|
238
|
+
if not hasattr(self, "dataframe") or self.dataframe is None:
|
|
239
|
+
return [Data(data={"status": "Waiting for DataFrame input"})]
|
|
240
|
+
|
|
241
|
+
tools = self.build_tools()
|
|
242
|
+
|
|
243
|
+
if not tools:
|
|
244
|
+
return [Data(data={"status": "No tools created. Check DataFrame and column selections."})]
|
|
245
|
+
|
|
246
|
+
results = []
|
|
247
|
+
for tool in tools:
|
|
248
|
+
tool_data = {
|
|
249
|
+
"tool_name": tool.name,
|
|
250
|
+
"display_name": tool.metadata.get("display_name", tool.name)
|
|
251
|
+
if hasattr(tool, "metadata")
|
|
252
|
+
else tool.name,
|
|
253
|
+
"description": tool.description,
|
|
254
|
+
"original_name": tool.metadata.get("original_name", "") if hasattr(tool, "metadata") else "",
|
|
255
|
+
"content_preview": tool.metadata.get("content_preview", "") if hasattr(tool, "metadata") else "",
|
|
256
|
+
}
|
|
257
|
+
results.append(Data(data=tool_data))
|
|
258
|
+
|
|
259
|
+
return results
|
|
@@ -14,11 +14,11 @@ if TYPE_CHECKING:
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class LambdaFilterComponent(Component):
|
|
17
|
-
display_name = "Smart
|
|
17
|
+
display_name = "Smart Transform"
|
|
18
18
|
description = "Uses an LLM to generate a function for filtering or transforming structured data."
|
|
19
|
-
documentation: str = "https://docs.langflow.org/components-processing#smart-
|
|
19
|
+
documentation: str = "https://docs.langflow.org/components-processing#smart-transform"
|
|
20
20
|
icon = "square-function"
|
|
21
|
-
name = "Smart
|
|
21
|
+
name = "Smart Transform"
|
|
22
22
|
|
|
23
23
|
inputs = [
|
|
24
24
|
DataInput(
|