lfx-nightly 0.1.12.dev27__py3-none-any.whl → 0.1.12.dev28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lfx/base/agents/agent.py +19 -11
- lfx/base/agents/utils.py +18 -0
- lfx/base/data/base_file.py +28 -19
- lfx/components/data/__init__.py +0 -6
- lfx/components/data/file.py +1 -1
- lfx/components/data/mock_data.py +5 -8
- lfx/components/data/save_file.py +625 -0
- lfx/components/data/web_search.py +225 -11
- lfx/components/docling/docling_remote.py +4 -1
- lfx/components/input_output/chat.py +8 -1
- lfx/components/nvidia/nvidia.py +1 -4
- lfx/components/processing/__init__.py +3 -3
- lfx/components/processing/dataframe_to_toolset.py +259 -0
- lfx/components/processing/lambda_filter.py +3 -3
- lfx/schema/image.py +72 -19
- lfx/schema/message.py +7 -2
- lfx/services/settings/base.py +7 -0
- lfx/utils/util.py +135 -0
- {lfx_nightly-0.1.12.dev27.dist-info → lfx_nightly-0.1.12.dev28.dist-info}/METADATA +1 -1
- {lfx_nightly-0.1.12.dev27.dist-info → lfx_nightly-0.1.12.dev28.dist-info}/RECORD +22 -23
- lfx/components/data/news_search.py +0 -164
- lfx/components/data/rss.py +0 -69
- lfx/components/processing/save_file.py +0 -225
- {lfx_nightly-0.1.12.dev27.dist-info → lfx_nightly-0.1.12.dev28.dist-info}/WHEEL +0 -0
- {lfx_nightly-0.1.12.dev27.dist-info → lfx_nightly-0.1.12.dev28.dist-info}/entry_points.txt +0 -0
lfx/schema/image.py
CHANGED
|
@@ -23,6 +23,9 @@ def is_image_file(file_path) -> bool:
|
|
|
23
23
|
|
|
24
24
|
def get_file_paths(files: list[str | dict]):
|
|
25
25
|
"""Get file paths for a list of files."""
|
|
26
|
+
if not files:
|
|
27
|
+
return []
|
|
28
|
+
|
|
26
29
|
storage_service = get_storage_service()
|
|
27
30
|
if not storage_service:
|
|
28
31
|
# Extract paths from dicts if present
|
|
@@ -31,7 +34,12 @@ def get_file_paths(files: list[str | dict]):
|
|
|
31
34
|
cache_dir = Path(user_cache_dir("langflow"))
|
|
32
35
|
|
|
33
36
|
for file in files:
|
|
37
|
+
if not file: # Skip empty/None files
|
|
38
|
+
continue
|
|
39
|
+
|
|
34
40
|
file_path = file["path"] if isinstance(file, dict) and "path" in file else file
|
|
41
|
+
if not file_path: # Skip empty paths
|
|
42
|
+
continue
|
|
35
43
|
|
|
36
44
|
# If it's a relative path like "flow_id/filename", resolve it to cache dir
|
|
37
45
|
path = Path(file_path)
|
|
@@ -52,13 +60,30 @@ def get_file_paths(files: list[str | dict]):
|
|
|
52
60
|
# Handle dict case
|
|
53
61
|
if storage_service is None:
|
|
54
62
|
continue
|
|
63
|
+
|
|
64
|
+
if not file: # Skip empty/None files
|
|
65
|
+
continue
|
|
66
|
+
|
|
55
67
|
if isinstance(file, dict) and "path" in file:
|
|
56
|
-
|
|
68
|
+
file_path_str = file["path"]
|
|
57
69
|
elif hasattr(file, "path") and file.path:
|
|
58
|
-
|
|
70
|
+
file_path_str = file.path
|
|
59
71
|
else:
|
|
60
|
-
|
|
61
|
-
|
|
72
|
+
file_path_str = file
|
|
73
|
+
|
|
74
|
+
if not file_path_str: # Skip empty paths
|
|
75
|
+
continue
|
|
76
|
+
|
|
77
|
+
file_path = Path(file_path_str)
|
|
78
|
+
# Handle edge case where path might be just a filename without parent
|
|
79
|
+
if file_path.parent == Path():
|
|
80
|
+
flow_id, file_name = "", file_path.name
|
|
81
|
+
else:
|
|
82
|
+
flow_id, file_name = str(file_path.parent), file_path.name
|
|
83
|
+
|
|
84
|
+
if not file_name: # Skip if no filename
|
|
85
|
+
continue
|
|
86
|
+
|
|
62
87
|
file_paths.append(storage_service.build_full_path(flow_id=flow_id, file_name=file_name))
|
|
63
88
|
return file_paths
|
|
64
89
|
|
|
@@ -69,22 +94,31 @@ async def get_files(
|
|
|
69
94
|
convert_to_base64: bool = False,
|
|
70
95
|
):
|
|
71
96
|
"""Get files from storage service."""
|
|
97
|
+
if not file_paths:
|
|
98
|
+
return []
|
|
99
|
+
|
|
72
100
|
storage_service = get_storage_service()
|
|
73
101
|
if not storage_service:
|
|
74
102
|
# For testing purposes, read files directly when no storage service
|
|
75
103
|
file_objects: list[str | bytes] = []
|
|
76
104
|
for file_path_str in file_paths:
|
|
105
|
+
if not file_path_str: # Skip empty paths
|
|
106
|
+
continue
|
|
107
|
+
|
|
77
108
|
file_path = Path(file_path_str)
|
|
78
109
|
if file_path.exists():
|
|
79
110
|
# Use async read for compatibility
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
111
|
+
try:
|
|
112
|
+
async with aiofiles.open(file_path, "rb") as f:
|
|
113
|
+
file_content = await f.read()
|
|
114
|
+
if convert_to_base64:
|
|
115
|
+
file_base64 = base64.b64encode(file_content).decode("utf-8")
|
|
116
|
+
file_objects.append(file_base64)
|
|
117
|
+
else:
|
|
118
|
+
file_objects.append(file_content)
|
|
119
|
+
except Exception as e:
|
|
120
|
+
msg = f"Error reading file {file_path}: {e}"
|
|
121
|
+
raise FileNotFoundError(msg) from e
|
|
88
122
|
else:
|
|
89
123
|
msg = f"File not found: {file_path}"
|
|
90
124
|
raise FileNotFoundError(msg)
|
|
@@ -92,16 +126,32 @@ async def get_files(
|
|
|
92
126
|
|
|
93
127
|
file_objects: list[str | bytes] = []
|
|
94
128
|
for file in file_paths:
|
|
129
|
+
if not file: # Skip empty file paths
|
|
130
|
+
continue
|
|
131
|
+
|
|
95
132
|
file_path = Path(file)
|
|
96
|
-
|
|
133
|
+
# Handle edge case where path might be just a filename without parent
|
|
134
|
+
if file_path.parent == Path():
|
|
135
|
+
flow_id, file_name = "", file_path.name
|
|
136
|
+
else:
|
|
137
|
+
flow_id, file_name = str(file_path.parent), file_path.name
|
|
138
|
+
|
|
139
|
+
if not file_name: # Skip if no filename
|
|
140
|
+
continue
|
|
141
|
+
|
|
97
142
|
if not storage_service:
|
|
98
143
|
continue
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
144
|
+
|
|
145
|
+
try:
|
|
146
|
+
file_object = await storage_service.get_file(flow_id=flow_id, file_name=file_name)
|
|
147
|
+
if convert_to_base64:
|
|
148
|
+
file_base64 = base64.b64encode(file_object).decode("utf-8")
|
|
149
|
+
file_objects.append(file_base64)
|
|
150
|
+
else:
|
|
151
|
+
file_objects.append(file_object)
|
|
152
|
+
except Exception as e:
|
|
153
|
+
msg = f"Error getting file {file} from storage: {e}"
|
|
154
|
+
raise FileNotFoundError(msg) from e
|
|
105
155
|
return file_objects
|
|
106
156
|
|
|
107
157
|
|
|
@@ -115,6 +165,9 @@ class Image(BaseModel):
|
|
|
115
165
|
"""Convert image to base64 string."""
|
|
116
166
|
if self.path:
|
|
117
167
|
files = get_files([self.path], convert_to_base64=True)
|
|
168
|
+
if not files:
|
|
169
|
+
msg = f"No files found or file could not be converted to base64: {self.path}"
|
|
170
|
+
raise ValueError(msg)
|
|
118
171
|
return files[0]
|
|
119
172
|
msg = "Image path is not set."
|
|
120
173
|
raise ValueError(msg)
|
lfx/schema/message.py
CHANGED
|
@@ -139,7 +139,8 @@ class Message(Data):
|
|
|
139
139
|
if self.sender == MESSAGE_SENDER_USER or not self.sender:
|
|
140
140
|
if self.files:
|
|
141
141
|
contents = [{"type": "text", "text": text}]
|
|
142
|
-
|
|
142
|
+
file_contents = self.get_file_content_dicts()
|
|
143
|
+
contents.extend(file_contents)
|
|
143
144
|
human_message = HumanMessage(content=contents)
|
|
144
145
|
else:
|
|
145
146
|
human_message = HumanMessage(content=text)
|
|
@@ -198,7 +199,11 @@ class Message(Data):
|
|
|
198
199
|
# Keep this async method for backwards compatibility
|
|
199
200
|
def get_file_content_dicts(self):
|
|
200
201
|
content_dicts = []
|
|
201
|
-
|
|
202
|
+
try:
|
|
203
|
+
files = get_file_paths(self.files)
|
|
204
|
+
except Exception as e: # noqa: BLE001
|
|
205
|
+
logger.error(f"Error getting file paths: {e}")
|
|
206
|
+
return content_dicts
|
|
202
207
|
|
|
203
208
|
for file in files:
|
|
204
209
|
if isinstance(file, Image):
|
lfx/services/settings/base.py
CHANGED
|
@@ -226,6 +226,10 @@ class Settings(BaseSettings):
|
|
|
226
226
|
"""The host on which Langflow will run."""
|
|
227
227
|
port: int = 7860
|
|
228
228
|
"""The port on which Langflow will run."""
|
|
229
|
+
runtime_port: int | None = Field(default=None, exclude=True)
|
|
230
|
+
"""TEMPORARY: The port detected at runtime after checking for conflicts.
|
|
231
|
+
This field is system-managed only and will be removed in future versions
|
|
232
|
+
when strict port enforcement is implemented (errors will be raised if port unavailable)."""
|
|
229
233
|
workers: int = 1
|
|
230
234
|
"""The number of workers to run."""
|
|
231
235
|
log_level: str = "critical"
|
|
@@ -275,6 +279,9 @@ class Settings(BaseSettings):
|
|
|
275
279
|
mcp_server_enable_progress_notifications: bool = False
|
|
276
280
|
"""If set to False, Langflow will not send progress notifications in the MCP server."""
|
|
277
281
|
|
|
282
|
+
# Add projects to MCP servers automatically on creation
|
|
283
|
+
add_projects_to_mcp_servers: bool = True
|
|
284
|
+
"""If set to True, newly created projects will be added to the user's MCP servers config automatically."""
|
|
278
285
|
# MCP Composer
|
|
279
286
|
mcp_composer_enabled: bool = True
|
|
280
287
|
"""If set to False, Langflow will not start the MCP Composer service."""
|
lfx/utils/util.py
CHANGED
|
@@ -2,6 +2,7 @@ import difflib
|
|
|
2
2
|
import importlib
|
|
3
3
|
import inspect
|
|
4
4
|
import json
|
|
5
|
+
import os
|
|
5
6
|
import re
|
|
6
7
|
from functools import wraps
|
|
7
8
|
from pathlib import Path
|
|
@@ -16,6 +17,140 @@ from lfx.template.frontend_node.constants import FORCE_SHOW_FIELDS
|
|
|
16
17
|
from lfx.utils import constants
|
|
17
18
|
|
|
18
19
|
|
|
20
|
+
def detect_container_environment() -> str | None:
|
|
21
|
+
"""Detect if running in a container and return the appropriate container type.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
'docker' if running in Docker, 'podman' if running in Podman, None otherwise.
|
|
25
|
+
"""
|
|
26
|
+
# Check for .dockerenv file (Docker)
|
|
27
|
+
if Path("/.dockerenv").exists():
|
|
28
|
+
return "docker"
|
|
29
|
+
|
|
30
|
+
# Check cgroup for container indicators
|
|
31
|
+
try:
|
|
32
|
+
with Path("/proc/self/cgroup").open() as f:
|
|
33
|
+
content = f.read()
|
|
34
|
+
if "docker" in content:
|
|
35
|
+
return "docker"
|
|
36
|
+
if "podman" in content:
|
|
37
|
+
return "podman"
|
|
38
|
+
except (FileNotFoundError, PermissionError):
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
# Check environment variables (lowercase 'container' is the standard for Podman)
|
|
42
|
+
if os.getenv("container") == "podman": # noqa: SIM112
|
|
43
|
+
return "podman"
|
|
44
|
+
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def get_container_host() -> str | None:
|
|
49
|
+
"""Get the hostname to access host services from within a container.
|
|
50
|
+
|
|
51
|
+
Tries multiple methods to find the correct hostname:
|
|
52
|
+
1. host.containers.internal (Podman) or host.docker.internal (Docker)
|
|
53
|
+
2. Gateway IP from routing table (fallback for Linux)
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
The hostname or IP to use, or None if not in a container.
|
|
57
|
+
"""
|
|
58
|
+
import socket
|
|
59
|
+
|
|
60
|
+
# Check if we're in a container first
|
|
61
|
+
container_type = detect_container_environment()
|
|
62
|
+
if not container_type:
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
# Try container-specific hostnames first based on detected type
|
|
66
|
+
if container_type == "podman":
|
|
67
|
+
# Podman: try host.containers.internal first
|
|
68
|
+
try:
|
|
69
|
+
socket.getaddrinfo("host.containers.internal", None)
|
|
70
|
+
except socket.gaierror:
|
|
71
|
+
pass
|
|
72
|
+
else:
|
|
73
|
+
return "host.containers.internal"
|
|
74
|
+
|
|
75
|
+
# Fallback to host.docker.internal (for Podman Desktop on macOS)
|
|
76
|
+
try:
|
|
77
|
+
socket.getaddrinfo("host.docker.internal", None)
|
|
78
|
+
except socket.gaierror:
|
|
79
|
+
pass
|
|
80
|
+
else:
|
|
81
|
+
return "host.docker.internal"
|
|
82
|
+
else:
|
|
83
|
+
# Docker: try host.docker.internal first
|
|
84
|
+
try:
|
|
85
|
+
socket.getaddrinfo("host.docker.internal", None)
|
|
86
|
+
except socket.gaierror:
|
|
87
|
+
pass
|
|
88
|
+
else:
|
|
89
|
+
return "host.docker.internal"
|
|
90
|
+
|
|
91
|
+
# Fallback to host.containers.internal (unlikely but possible)
|
|
92
|
+
try:
|
|
93
|
+
socket.getaddrinfo("host.containers.internal", None)
|
|
94
|
+
except socket.gaierror:
|
|
95
|
+
pass
|
|
96
|
+
else:
|
|
97
|
+
return "host.containers.internal"
|
|
98
|
+
|
|
99
|
+
# Fallback: try to get gateway IP from routing table (Linux containers)
|
|
100
|
+
try:
|
|
101
|
+
with Path("/proc/net/route").open() as f:
|
|
102
|
+
for line in f:
|
|
103
|
+
fields = line.strip().split()
|
|
104
|
+
min_field_count = 3 # Minimum fields needed: interface, destination, gateway
|
|
105
|
+
if len(fields) >= min_field_count and fields[1] == "00000000": # Default route
|
|
106
|
+
# Gateway is in hex format (little-endian)
|
|
107
|
+
gateway_hex = fields[2]
|
|
108
|
+
# Convert hex to IP address
|
|
109
|
+
# The hex is in little-endian format, so we read it backwards in pairs
|
|
110
|
+
octets = [gateway_hex[i : i + 2] for i in range(0, 8, 2)]
|
|
111
|
+
return ".".join(str(int(octet, 16)) for octet in reversed(octets))
|
|
112
|
+
except (FileNotFoundError, PermissionError, IndexError, ValueError):
|
|
113
|
+
pass
|
|
114
|
+
|
|
115
|
+
return None
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def transform_localhost_url(url: str) -> str:
|
|
119
|
+
"""Transform localhost URLs to container-accessible hosts when running in a container.
|
|
120
|
+
|
|
121
|
+
Automatically detects if running inside a container and finds the appropriate host
|
|
122
|
+
address to replace localhost/127.0.0.1. Tries in order:
|
|
123
|
+
- host.docker.internal (if resolvable)
|
|
124
|
+
- host.containers.internal (if resolvable)
|
|
125
|
+
- Gateway IP from routing table (fallback)
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
url: The original URL
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
Transformed URL with container-accessible host if applicable, otherwise the original URL.
|
|
132
|
+
|
|
133
|
+
Example:
|
|
134
|
+
>>> transform_localhost_url("http://localhost:5001")
|
|
135
|
+
# Returns "http://host.docker.internal:5001" if running in Docker and hostname resolves
|
|
136
|
+
# Returns "http://172.17.0.1:5001" if running in Docker on Linux (gateway IP fallback)
|
|
137
|
+
# Returns "http://localhost:5001" if not in a container
|
|
138
|
+
"""
|
|
139
|
+
container_host = get_container_host()
|
|
140
|
+
|
|
141
|
+
if not container_host:
|
|
142
|
+
return url
|
|
143
|
+
|
|
144
|
+
# Replace localhost and 127.0.0.1 with the container host
|
|
145
|
+
localhost_patterns = ["localhost", "127.0.0.1"]
|
|
146
|
+
|
|
147
|
+
for pattern in localhost_patterns:
|
|
148
|
+
if pattern in url:
|
|
149
|
+
return url.replace(pattern, container_host)
|
|
150
|
+
|
|
151
|
+
return url
|
|
152
|
+
|
|
153
|
+
|
|
19
154
|
def unescape_string(s: str):
|
|
20
155
|
# Replace escaped new line characters with actual new line characters
|
|
21
156
|
return s.replace("\\n", "\n")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lfx-nightly
|
|
3
|
-
Version: 0.1.12.
|
|
3
|
+
Version: 0.1.12.dev28
|
|
4
4
|
Summary: Langflow Executor - A lightweight CLI tool for executing and serving Langflow AI flows
|
|
5
5
|
Author-email: Gabriel Luiz Freitas Almeida <gabriel@langflow.org>
|
|
6
6
|
Requires-Python: <3.14,>=3.10
|
|
@@ -7,13 +7,13 @@ lfx/type_extraction.py,sha256=eCZNl9nAQivKdaPv_9BK71N0JV9Rtr--veAht0dnQ4A,2921
|
|
|
7
7
|
lfx/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
lfx/base/constants.py,sha256=v9vo0Ifg8RxDu__XqgGzIXHlsnUFyWM-SSux0uHHoz8,1187
|
|
9
9
|
lfx/base/agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
-
lfx/base/agents/agent.py,sha256
|
|
10
|
+
lfx/base/agents/agent.py,sha256=-PZ0gzBCXB4EXDHoXhYNY-tPvcOogfSNfV_eb1B1u-c,11784
|
|
11
11
|
lfx/base/agents/callback.py,sha256=mjlT9ukBMVrfjYrHsJowqpY4g9hVGBVBIYhncLWr3tQ,3692
|
|
12
12
|
lfx/base/agents/context.py,sha256=u0wboX1aRR22Ia8gY14WF12RjhE0Rxv9hPBiixT9DtQ,3916
|
|
13
13
|
lfx/base/agents/default_prompts.py,sha256=tUjfczwt4D5R1KozNOl1uSL2V2rSCZeUMS-cfV4Gwn0,955
|
|
14
14
|
lfx/base/agents/errors.py,sha256=4QY1AqSWZaOjq-iQRYH_aeCfH_hWECLQkiwybNXz66U,531
|
|
15
15
|
lfx/base/agents/events.py,sha256=1SLai0H5pvorojgBL7l_xbtJ7gpkcddd5IWyUw25UTg,14035
|
|
16
|
-
lfx/base/agents/utils.py,sha256=
|
|
16
|
+
lfx/base/agents/utils.py,sha256=OcmtZx4BTFTyq2A3rta3WoJn98UzEYdEXoRLs8-mTVo,6511
|
|
17
17
|
lfx/base/agents/crewai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
18
|
lfx/base/agents/crewai/crew.py,sha256=TN1JyLXMpJc2yPH3tokhFmxKKYoJ4lMvmG19DmpKfeY,7953
|
|
19
19
|
lfx/base/agents/crewai/tasks.py,sha256=1pBok1UDdAjLtOf2Y-rDrjRaM93no-XLy5Bf_zvWsRM,151
|
|
@@ -28,7 +28,7 @@ lfx/base/compressors/model.py,sha256=-FFBAPAy9bAgvklIo7x_uwShZR5NoMHakF6f_hNnLHg
|
|
|
28
28
|
lfx/base/curl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
29
|
lfx/base/curl/parse.py,sha256=Yw6mMbGg7e-ffrBItEUJeTiljneCXlNyt5afzEP9eUI,6094
|
|
30
30
|
lfx/base/data/__init__.py,sha256=lQsYYMyAg_jA9ZF7oc-LNZsRE2uMGT6g16WzsUByHqs,81
|
|
31
|
-
lfx/base/data/base_file.py,sha256=
|
|
31
|
+
lfx/base/data/base_file.py,sha256=TUD84Jj0tYr0DEz_D7oK9FUBkBCAIHitxryX9nXwgus,27571
|
|
32
32
|
lfx/base/data/docling_utils.py,sha256=gVDxOZghSJEo5n-UNkVGBQYqkvfNqkNkltBhAnoaJd4,13048
|
|
33
33
|
lfx/base/data/utils.py,sha256=dGqEO4zE5s_V2Cs4j0EEeyLjYLX6Zex-EGzIOznK76o,5960
|
|
34
34
|
lfx/base/document_transformers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -201,18 +201,17 @@ lfx/components/crewai/sequential_task.py,sha256=sfF7j8xcFri5JCA_AEePYRCq9ll2wnZv
|
|
|
201
201
|
lfx/components/crewai/sequential_task_agent.py,sha256=ClSD74LDE1dOIL9i0Dj0OUi3jyGAGUu3NpxZ_U3OjxU,4738
|
|
202
202
|
lfx/components/custom_component/__init__.py,sha256=3GXaQiQL8mTRYU-tpVPxtRKFkAd2zBQWAsbc-pQKJMA,928
|
|
203
203
|
lfx/components/custom_component/custom_component.py,sha256=1QpopvpXkzQ3Vg7TzFI2TszRBR5fFDcQNJYra_q_hrA,919
|
|
204
|
-
lfx/components/data/__init__.py,sha256=
|
|
204
|
+
lfx/components/data/__init__.py,sha256=g2vQndQWDTcaJrwi8EaDgW4UJoDtItwhCmDBb2cxEDw,1925
|
|
205
205
|
lfx/components/data/api_request.py,sha256=gKVtJhEzh1PdawZqQtxBwi0vU3kQ_f25jCd2tj-EeU0,20211
|
|
206
206
|
lfx/components/data/csv_to_data.py,sha256=NsNe8rZdkqscnWPynbbd3-svrRj3EEWNaJszjW9b1_k,3345
|
|
207
207
|
lfx/components/data/directory.py,sha256=iqINxxy5w60l753zraB-EDpYny8FR6vaa-AgVkdYsLk,3936
|
|
208
|
-
lfx/components/data/file.py,sha256=
|
|
208
|
+
lfx/components/data/file.py,sha256=pPlmaEHElAeUfSs60G0cl-goynWg9I3uo7IPeN63UEc,26073
|
|
209
209
|
lfx/components/data/json_to_data.py,sha256=p6MPyUgDvY9aKTL2_2cUGmkeK7baS-G9rkCvzHAnhw8,3571
|
|
210
|
-
lfx/components/data/mock_data.py,sha256=
|
|
211
|
-
lfx/components/data/
|
|
212
|
-
lfx/components/data/rss.py,sha256=RGUB2Iz07Du7p_GOm84W-i2uqDfG6DOc0IQdTKNQsjY,2487
|
|
210
|
+
lfx/components/data/mock_data.py,sha256=0h3OezKb8P5x6XWGzZJ8JmqmB9eK-bhpfLm0GO21MNY,16039
|
|
211
|
+
lfx/components/data/save_file.py,sha256=1ZPSQROFRjt1SivSuFr0jtnAMR1Bqf9DJLokYzbGyY8,25190
|
|
213
212
|
lfx/components/data/sql_executor.py,sha256=sN1lWM65O_pCfZxNAzgjtZmcTPGBLqMud2_7nFv-kpM,3726
|
|
214
213
|
lfx/components/data/url.py,sha256=zbfTeTBukw3F_mRBMIJrQYF94psEIBuS8dFjcQku5SE,11001
|
|
215
|
-
lfx/components/data/web_search.py,sha256=
|
|
214
|
+
lfx/components/data/web_search.py,sha256=48SCp-2I_Qckp5tmTVC9JBw2C-MhBDF14MJLaGjLpyQ,12758
|
|
216
215
|
lfx/components/data/webhook.py,sha256=i2jdXSLUVA0UpnYBZzdPo035MeiUcFKVJy37EhLKq6o,1643
|
|
217
216
|
lfx/components/datastax/__init__.py,sha256=VEh_Qu8dYPOVB9dISRaxheFPKxzQoNNe1DCwTWTGNIU,2415
|
|
218
217
|
lfx/components/datastax/astra_assistant_manager.py,sha256=5vLbuCxSz04GKxEpP0TNm9K_RAxxoMTz-Mt_YhkyfH0,11557
|
|
@@ -260,7 +259,7 @@ lfx/components/deepseek/deepseek.py,sha256=yNrHoljXOMScKng-oSB-ceWhVZeuh11lmrAY7
|
|
|
260
259
|
lfx/components/docling/__init__.py,sha256=O4utz9GHFpTVe_Wy0PR80yA1irJQRnAFQWkoLCVj888,1424
|
|
261
260
|
lfx/components/docling/chunk_docling_document.py,sha256=OX-jj4nX3UZgopViMAGAnFgtLql0sgs6cVmU8p9QbqA,7600
|
|
262
261
|
lfx/components/docling/docling_inline.py,sha256=12s4U860c-wkpmd2JYi6qxK1Wx_PF9j9BARLhXCL0E0,8496
|
|
263
|
-
lfx/components/docling/docling_remote.py,sha256=
|
|
262
|
+
lfx/components/docling/docling_remote.py,sha256=Ju61E93tLBq6KsRRGVA1_ySWzEOdOFj9jS9kJ7gc3H4,6980
|
|
264
263
|
lfx/components/docling/export_docling_document.py,sha256=TeFt3TesCxSqW57nv-30gf2dX8qMDUHLRhwU-1ciq08,4681
|
|
265
264
|
lfx/components/documentloaders/__init__.py,sha256=LNl2hG2InevQCUREFKhF9ylaTf_kwPsdjiDbx2ElX3M,69
|
|
266
265
|
lfx/components/duckduckgo/__init__.py,sha256=Y4zaOLVOKsD_qwF7KRLek1pcaKKHa6lGUHObuQTR9iY,104
|
|
@@ -315,7 +314,7 @@ lfx/components/ibm/watsonx_embeddings.py,sha256=_97UE-qQDCjkWfX3NFWNCti4TUXxO1LO
|
|
|
315
314
|
lfx/components/icosacomputing/__init__.py,sha256=NByWM-IMPf7N1lOeZDet8CvIa8A25kG3yKircYwS52w,120
|
|
316
315
|
lfx/components/icosacomputing/combinatorial_reasoner.py,sha256=SFVwR_8jGHVDaGO81jj2vzzeKh892h1nMGxCDljbvNY,2766
|
|
317
316
|
lfx/components/input_output/__init__.py,sha256=BaDAE9j41eSg04p5S6MJyUs4daU8UNp5e4m988K4VLQ,1291
|
|
318
|
-
lfx/components/input_output/chat.py,sha256=
|
|
317
|
+
lfx/components/input_output/chat.py,sha256=9wH2hrMl-pD0uhopsi8kWcs--o_1lCsoCSDllAKSLqc,3143
|
|
319
318
|
lfx/components/input_output/chat_output.py,sha256=lkf00vS0CYsTIdCN_ZX7DG4IL8hD2I9xQahyuL0St-w,6641
|
|
320
319
|
lfx/components/input_output/text.py,sha256=PdKOpZG5zVIoh45uzxRbY_pcycmrLaicoFhf9dauhZ0,743
|
|
321
320
|
lfx/components/input_output/text_output.py,sha256=Ij_Xk2hubdSwZoNDoltJU78YdCw91rE9kkGbY6qLViY,820
|
|
@@ -399,7 +398,7 @@ lfx/components/notdiamond/notdiamond.py,sha256=om6_UB9n5rt1T-yXxgMFBPBEP2tJtnGC2
|
|
|
399
398
|
lfx/components/novita/__init__.py,sha256=i8RrVPX00S3RupAlZ078-mdGB7VHwvpdnL7IfsWWPIo,937
|
|
400
399
|
lfx/components/novita/novita.py,sha256=IULE3StkQwECxOR3HMJsEyE7cN5hwslxovvhMmquuNo,4368
|
|
401
400
|
lfx/components/nvidia/__init__.py,sha256=Phf45VUW7An5LnauqpB-lIRVwwBiQawZkoWbqBjQnWE,1756
|
|
402
|
-
lfx/components/nvidia/nvidia.py,sha256=
|
|
401
|
+
lfx/components/nvidia/nvidia.py,sha256=Z5EomVSAzV_fqtgSKw4kO2ko59IIoXiQat8cgfpTlks,6180
|
|
403
402
|
lfx/components/nvidia/nvidia_embedding.py,sha256=D97QOAgtZEzwHvBmDDShTmZhDAyN2SRbfb71515ib-g,2658
|
|
404
403
|
lfx/components/nvidia/nvidia_ingest.py,sha256=_wxmYNmRQ2kBfAxaXLykBIlKFXVGXEsTY22spVeoCCI,12065
|
|
405
404
|
lfx/components/nvidia/nvidia_rerank.py,sha256=zzl2skHxf2oXINDZBmG8-GbkTkc6EWtyMjyV8pVRAm4,2293
|
|
@@ -421,7 +420,7 @@ lfx/components/pgvector/__init__.py,sha256=swho2zRxXeqlLBtSJD--b2XS0R3UiLPtwejql
|
|
|
421
420
|
lfx/components/pgvector/pgvector.py,sha256=UBF2B79eVfjj3hHoxrHzmT2UXOsUZxp4dWvyuPS2wh4,2635
|
|
422
421
|
lfx/components/pinecone/__init__.py,sha256=iz4GAXdbt9vo_CeWns1qyT0s7a56Q5CyS4H5MWa4Mv0,953
|
|
423
422
|
lfx/components/pinecone/pinecone.py,sha256=VkygoOmrco417hYInjYIFwuxX1M7peYJl9-jhuiySR8,5137
|
|
424
|
-
lfx/components/processing/__init__.py,sha256
|
|
423
|
+
lfx/components/processing/__init__.py,sha256=-M4X7S2PjQ2wGFbH1B1tuIs_-kCifCatHIu6pPhyV8U,5056
|
|
425
424
|
lfx/components/processing/alter_metadata.py,sha256=Cy_mLq7E8nEJd36kmCVwqjvt-4HvWcqAXwPXjdOqVps,3831
|
|
426
425
|
lfx/components/processing/batch_run.py,sha256=KZtEaQMuSEUsQ5qwiU-dJPMAqNE5LA83HoLk-Y646hg,7861
|
|
427
426
|
lfx/components/processing/combine_text.py,sha256=EP-2VD3Za5usoNj87Gtjbjh7e23_4tNpXzFo7pXpKx8,1290
|
|
@@ -430,11 +429,12 @@ lfx/components/processing/create_data.py,sha256=PjE1JQkNrXtWpk2KHow27fArlXkRCwOG
|
|
|
430
429
|
lfx/components/processing/data_operations.py,sha256=9dloD4ZEvwlpQwpV2Tig6sGwWTOxWXb9gMX6RO_hiL0,21515
|
|
431
430
|
lfx/components/processing/data_to_dataframe.py,sha256=V7n3kCjp6v6vdcsrdVJQxlgaYHqevL46x4lAcgnKNGA,2408
|
|
432
431
|
lfx/components/processing/dataframe_operations.py,sha256=tNaxm27vTkH_uVqqQ5k-c0HwVuvGAgNRzT0LCCbqmnI,11552
|
|
432
|
+
lfx/components/processing/dataframe_to_toolset.py,sha256=jnXdzOPrQnKne7P7MTiU8Oye4KUydCe6BKfkT9E7kr0,9911
|
|
433
433
|
lfx/components/processing/extract_key.py,sha256=7e0_ThUzvAe6blYuj0A8zc-b3FzYqlPJPvK4krF4voc,2012
|
|
434
434
|
lfx/components/processing/filter_data.py,sha256=BMUJNyFtTLRdmuxcyPeH_W2PfEWErH6rxMfsLSQrarw,1317
|
|
435
435
|
lfx/components/processing/filter_data_values.py,sha256=hHUiVJxnbERVbvyycmBmUrl4nDK6x7cfQThs5N9JRkk,3182
|
|
436
436
|
lfx/components/processing/json_cleaner.py,sha256=XBUJl67E0qI93yK6L_8uHmbMRaKllk1cQ2c1Dz5DdWw,3750
|
|
437
|
-
lfx/components/processing/lambda_filter.py,sha256=
|
|
437
|
+
lfx/components/processing/lambda_filter.py,sha256=QHhlDuyU-4Cm28UB0umKohkd7yp_Hlnngj-d2faHvIk,5566
|
|
438
438
|
lfx/components/processing/llm_router.py,sha256=FYC0SylbjUDlOBRLSdpFfU6Ep4IMk7tWpRAQJ5k9aA4,23198
|
|
439
439
|
lfx/components/processing/merge_data.py,sha256=ouy4E6rFi2A4_xC6T8Vr3GwFy7fhR98WBuXLGFGom7o,3569
|
|
440
440
|
lfx/components/processing/message_to_data.py,sha256=0K8SIq6vuAvQ3K7siXstNint6R1-rAuZ5NIwQiiG_n0,1342
|
|
@@ -445,7 +445,6 @@ lfx/components/processing/parser.py,sha256=vxnub-7jUTAtMgcLTdZGzHuRby_B1d1HOntsx
|
|
|
445
445
|
lfx/components/processing/prompt.py,sha256=c4LQPOQSvz1Z1e73uyOm8TaTxWDpCGcujBd-a6AxL1A,2761
|
|
446
446
|
lfx/components/processing/python_repl_core.py,sha256=6kOu64pWyBwBpTqOTM9LPnSsnTX6q_J-Hqhmoxp0wFs,3472
|
|
447
447
|
lfx/components/processing/regex.py,sha256=9n171_Ze--5gpKFJJyJlYafuEOwbPQPiyjhdLY3SUrY,2689
|
|
448
|
-
lfx/components/processing/save_file.py,sha256=UZsmG2aTYGgd6pTF1RXNhQDKVUc-IGmVsdMrREtIA6E,9559
|
|
449
448
|
lfx/components/processing/select_data.py,sha256=BRK9mM5NuHveCrMOyIXjzzpEsNMEiA7oQXvk1DZLHM4,1788
|
|
450
449
|
lfx/components/processing/split_text.py,sha256=8oZ-_aYfjxEdzFFr2reKeBVPjMrAeAauZiQkM9J7Syc,5293
|
|
451
450
|
lfx/components/processing/structured_output.py,sha256=pbGwTZ_MCRwfpNY55zKDrSCw1fFYosL11QVNkZZndz8,8125
|
|
@@ -641,10 +640,10 @@ lfx/schema/dataframe.py,sha256=Jo07krf3lI-dicAu5Y5Enf90NSEWkpD_di0rSj9krIM,7477
|
|
|
641
640
|
lfx/schema/dotdict.py,sha256=d6R5jv8V_pxaQUX3QP41ZzTz2wZpnZ0OFsylFf3xL-Q,2756
|
|
642
641
|
lfx/schema/encoders.py,sha256=7vlWHZnZuDv1UVuP9X7Xn8srP1HZqLygOmkps3EJyY0,332
|
|
643
642
|
lfx/schema/graph.py,sha256=o7qXhHZT4lEwjJZtlg4k9SNPgmMVZsZsclBbe8v_y6Y,1313
|
|
644
|
-
lfx/schema/image.py,sha256=
|
|
643
|
+
lfx/schema/image.py,sha256=WdaOT3bjkJaG28RpgmurtfcnOG7Hr2phZ27YXH25uHA,5970
|
|
645
644
|
lfx/schema/json_schema.py,sha256=keN4trNimJzqrLnPvBhq90rc5FTwQH44lLpmLv4BzhA,5316
|
|
646
645
|
lfx/schema/log.py,sha256=xbwSvJKmT1U8kxqIcV8BYgJxtu8Q6ntJKF8cIeksPEo,1943
|
|
647
|
-
lfx/schema/message.py,sha256=
|
|
646
|
+
lfx/schema/message.py,sha256=Fg83OjS67qJYCevqt_5JcBu9xXkh1WY6S6DBLAvT62g,18076
|
|
648
647
|
lfx/schema/openai_responses_schemas.py,sha256=drMCAlliefHfGRojBTMepPwk4DyEGh67naWvMPD10Sw,2596
|
|
649
648
|
lfx/schema/properties.py,sha256=ZRY6FUDfqpc5wQ-bi-ZuUUrusF9t-pt9fQa_FNPpia0,1356
|
|
650
649
|
lfx/schema/schema.py,sha256=XbIuvD64EdVljP1V32tsEL-ETXOQSFipMDaiMGzYttM,5079
|
|
@@ -675,7 +674,7 @@ lfx/services/mcp_composer/factory.py,sha256=f8Bj0ZR9A_o1c3Kw5JKyR6SbtbCEPNWOy8b0
|
|
|
675
674
|
lfx/services/mcp_composer/service.py,sha256=Binv29dXSRscUPOa40714w_NYmebZB3gwBp68KnaSFc,25765
|
|
676
675
|
lfx/services/settings/__init__.py,sha256=UISBvOQIqoA3a8opwJrTQp4PSTqpReY6GQ_7O6WuqJQ,65
|
|
677
676
|
lfx/services/settings/auth.py,sha256=_18KZipq0udCJPq-4xCD_juhqSwAEvoCqxOTSYsNv5w,5720
|
|
678
|
-
lfx/services/settings/base.py,sha256=
|
|
677
|
+
lfx/services/settings/base.py,sha256=z45EHJpkuvN9DwBuOOaShdxpeX_D0moyMAlG2E640l0,26119
|
|
679
678
|
lfx/services/settings/constants.py,sha256=ZBJolZ4kx0ZoYp2BDyHkgDFgaXEQAH-ZcLqgunv_MqQ,908
|
|
680
679
|
lfx/services/settings/factory.py,sha256=NezZ6TE_xP955B9l9pI6ONNyoylrHPfUZN8arvLVRXg,615
|
|
681
680
|
lfx/services/settings/feature_flags.py,sha256=HGuDGgfOBIDtuEiEVTgoWHxKqX2vuVBRgsqdX_4D9kg,205
|
|
@@ -715,10 +714,10 @@ lfx/utils/image.py,sha256=wMWBEI1gW3cFlQcio3mWgfHBaOw1uoAnqNmEacE_8xo,2133
|
|
|
715
714
|
lfx/utils/lazy_load.py,sha256=UDtXi8N7NT9r-FRGxsLUfDtGU_X8yqt-RQqgpc9TqAw,394
|
|
716
715
|
lfx/utils/request_utils.py,sha256=A6vmwpr7f3ZUxHg6Sz2-BdUUsyAwg84-7N_DNoPC8_Q,518
|
|
717
716
|
lfx/utils/schemas.py,sha256=NbOtVQBrn4d0BAu-0H_eCTZI2CXkKZlRY37XCSmuJwc,3865
|
|
718
|
-
lfx/utils/util.py,sha256=
|
|
717
|
+
lfx/utils/util.py,sha256=ZW7sYJBtIhM6o_GHgHoI-mXB8l2vTNgRgdUfsogFp0g,20419
|
|
719
718
|
lfx/utils/util_strings.py,sha256=nU_IcdphNaj6bAPbjeL-c1cInQPfTBit8mp5Y57lwQk,1686
|
|
720
719
|
lfx/utils/version.py,sha256=cHpbO0OJD2JQAvVaTH_6ibYeFbHJV0QDHs_YXXZ-bT8,671
|
|
721
|
-
lfx_nightly-0.1.12.
|
|
722
|
-
lfx_nightly-0.1.12.
|
|
723
|
-
lfx_nightly-0.1.12.
|
|
724
|
-
lfx_nightly-0.1.12.
|
|
720
|
+
lfx_nightly-0.1.12.dev28.dist-info/METADATA,sha256=gzrj10pfKq79hA5TOZP0FfKFsQklv-9L_XZd-OiWytk,8068
|
|
721
|
+
lfx_nightly-0.1.12.dev28.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
722
|
+
lfx_nightly-0.1.12.dev28.dist-info/entry_points.txt,sha256=1724p3RHDQRT2CKx_QRzEIa7sFuSVO0Ux70YfXfoMT4,42
|
|
723
|
+
lfx_nightly-0.1.12.dev28.dist-info/RECORD,,
|
|
@@ -1,164 +0,0 @@
|
|
|
1
|
-
from urllib.parse import quote_plus
|
|
2
|
-
|
|
3
|
-
import pandas as pd
|
|
4
|
-
import requests
|
|
5
|
-
from bs4 import BeautifulSoup
|
|
6
|
-
|
|
7
|
-
from lfx.custom import Component
|
|
8
|
-
from lfx.io import IntInput, MessageTextInput, Output
|
|
9
|
-
from lfx.schema import DataFrame
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class NewsSearchComponent(Component):
|
|
13
|
-
display_name = "News Search"
|
|
14
|
-
description = "Searches Google News via RSS. Returns clean article data."
|
|
15
|
-
documentation: str = "https://docs.langflow.org/components-data#news-search"
|
|
16
|
-
icon = "newspaper"
|
|
17
|
-
name = "NewsSearch"
|
|
18
|
-
|
|
19
|
-
inputs = [
|
|
20
|
-
MessageTextInput(
|
|
21
|
-
name="query",
|
|
22
|
-
display_name="Search Query",
|
|
23
|
-
info="Search keywords for news articles.",
|
|
24
|
-
tool_mode=True,
|
|
25
|
-
required=True,
|
|
26
|
-
),
|
|
27
|
-
MessageTextInput(
|
|
28
|
-
name="hl",
|
|
29
|
-
display_name="Language (hl)",
|
|
30
|
-
info="Language code, e.g. en-US, fr, de. Default: en-US.",
|
|
31
|
-
tool_mode=False,
|
|
32
|
-
input_types=[],
|
|
33
|
-
required=False,
|
|
34
|
-
advanced=True,
|
|
35
|
-
),
|
|
36
|
-
MessageTextInput(
|
|
37
|
-
name="gl",
|
|
38
|
-
display_name="Country (gl)",
|
|
39
|
-
info="Country code, e.g. US, FR, DE. Default: US.",
|
|
40
|
-
tool_mode=False,
|
|
41
|
-
input_types=[],
|
|
42
|
-
required=False,
|
|
43
|
-
advanced=True,
|
|
44
|
-
),
|
|
45
|
-
MessageTextInput(
|
|
46
|
-
name="ceid",
|
|
47
|
-
display_name="Country:Language (ceid)",
|
|
48
|
-
info="e.g. US:en, FR:fr. Default: US:en.",
|
|
49
|
-
tool_mode=False,
|
|
50
|
-
value="US:en",
|
|
51
|
-
input_types=[],
|
|
52
|
-
required=False,
|
|
53
|
-
advanced=True,
|
|
54
|
-
),
|
|
55
|
-
MessageTextInput(
|
|
56
|
-
name="topic",
|
|
57
|
-
display_name="Topic",
|
|
58
|
-
info="One of: WORLD, NATION, BUSINESS, TECHNOLOGY, ENTERTAINMENT, SCIENCE, SPORTS, HEALTH.",
|
|
59
|
-
tool_mode=False,
|
|
60
|
-
input_types=[],
|
|
61
|
-
required=False,
|
|
62
|
-
advanced=True,
|
|
63
|
-
),
|
|
64
|
-
MessageTextInput(
|
|
65
|
-
name="location",
|
|
66
|
-
display_name="Location (Geo)",
|
|
67
|
-
info="City, state, or country for location-based news. Leave blank for keyword search.",
|
|
68
|
-
tool_mode=False,
|
|
69
|
-
input_types=[],
|
|
70
|
-
required=False,
|
|
71
|
-
advanced=True,
|
|
72
|
-
),
|
|
73
|
-
IntInput(
|
|
74
|
-
name="timeout",
|
|
75
|
-
display_name="Timeout",
|
|
76
|
-
info="Timeout for the request in seconds.",
|
|
77
|
-
value=5,
|
|
78
|
-
required=False,
|
|
79
|
-
advanced=True,
|
|
80
|
-
),
|
|
81
|
-
]
|
|
82
|
-
|
|
83
|
-
outputs = [Output(name="articles", display_name="News Articles", method="search_news")]
|
|
84
|
-
|
|
85
|
-
def search_news(self) -> DataFrame:
|
|
86
|
-
# Defaults
|
|
87
|
-
hl = getattr(self, "hl", None) or "en-US"
|
|
88
|
-
gl = getattr(self, "gl", None) or "US"
|
|
89
|
-
ceid = getattr(self, "ceid", None) or f"{gl}:{hl.split('-')[0]}"
|
|
90
|
-
topic = getattr(self, "topic", None)
|
|
91
|
-
location = getattr(self, "location", None)
|
|
92
|
-
query = getattr(self, "query", None)
|
|
93
|
-
|
|
94
|
-
# Build base URL
|
|
95
|
-
if topic:
|
|
96
|
-
# Topic-based feed
|
|
97
|
-
base_url = f"https://news.google.com/rss/headlines/section/topic/{quote_plus(topic.upper())}"
|
|
98
|
-
params = f"?hl={hl}&gl={gl}&ceid={ceid}"
|
|
99
|
-
rss_url = base_url + params
|
|
100
|
-
elif location:
|
|
101
|
-
# Location-based feed
|
|
102
|
-
base_url = f"https://news.google.com/rss/headlines/section/geo/{quote_plus(location)}"
|
|
103
|
-
params = f"?hl={hl}&gl={gl}&ceid={ceid}"
|
|
104
|
-
rss_url = base_url + params
|
|
105
|
-
elif query:
|
|
106
|
-
# Keyword search feed
|
|
107
|
-
base_url = "https://news.google.com/rss/search?q="
|
|
108
|
-
query_parts = [query]
|
|
109
|
-
query_encoded = quote_plus(" ".join(query_parts))
|
|
110
|
-
params = f"&hl={hl}&gl={gl}&ceid={ceid}"
|
|
111
|
-
rss_url = f"{base_url}{query_encoded}{params}"
|
|
112
|
-
else:
|
|
113
|
-
self.status = "No search query, topic, or location provided."
|
|
114
|
-
self.log(self.status)
|
|
115
|
-
return DataFrame(
|
|
116
|
-
pd.DataFrame(
|
|
117
|
-
[
|
|
118
|
-
{
|
|
119
|
-
"title": "Error",
|
|
120
|
-
"link": "",
|
|
121
|
-
"published": "",
|
|
122
|
-
"summary": "No search query, topic, or location provided.",
|
|
123
|
-
}
|
|
124
|
-
]
|
|
125
|
-
)
|
|
126
|
-
)
|
|
127
|
-
|
|
128
|
-
try:
|
|
129
|
-
response = requests.get(rss_url, timeout=self.timeout)
|
|
130
|
-
response.raise_for_status()
|
|
131
|
-
soup = BeautifulSoup(response.content, "xml")
|
|
132
|
-
items = soup.find_all("item")
|
|
133
|
-
except requests.RequestException as e:
|
|
134
|
-
self.status = f"Failed to fetch news: {e}"
|
|
135
|
-
self.log(self.status)
|
|
136
|
-
return DataFrame(pd.DataFrame([{"title": "Error", "link": "", "published": "", "summary": str(e)}]))
|
|
137
|
-
except (AttributeError, ValueError, TypeError) as e:
|
|
138
|
-
self.status = f"Unexpected error: {e!s}"
|
|
139
|
-
self.log(self.status)
|
|
140
|
-
return DataFrame(pd.DataFrame([{"title": "Error", "link": "", "published": "", "summary": str(e)}]))
|
|
141
|
-
|
|
142
|
-
if not items:
|
|
143
|
-
self.status = "No news articles found."
|
|
144
|
-
self.log(self.status)
|
|
145
|
-
return DataFrame(pd.DataFrame([{"title": "No articles found", "link": "", "published": "", "summary": ""}]))
|
|
146
|
-
|
|
147
|
-
articles = []
|
|
148
|
-
for item in items:
|
|
149
|
-
try:
|
|
150
|
-
title = self.clean_html(item.title.text if item.title else "")
|
|
151
|
-
link = item.link.text if item.link else ""
|
|
152
|
-
published = item.pubDate.text if item.pubDate else ""
|
|
153
|
-
summary = self.clean_html(item.description.text if item.description else "")
|
|
154
|
-
articles.append({"title": title, "link": link, "published": published, "summary": summary})
|
|
155
|
-
except (AttributeError, ValueError, TypeError) as e:
|
|
156
|
-
self.log(f"Error parsing article: {e!s}")
|
|
157
|
-
continue
|
|
158
|
-
|
|
159
|
-
df_articles = pd.DataFrame(articles)
|
|
160
|
-
self.log(f"Found {len(df_articles)} articles.")
|
|
161
|
-
return DataFrame(df_articles)
|
|
162
|
-
|
|
163
|
-
def clean_html(self, html_string: str) -> str:
|
|
164
|
-
return BeautifulSoup(html_string, "html.parser").get_text(separator=" ", strip=True)
|