lfx-nightly 0.2.0.dev0__py3-none-any.whl → 0.2.0.dev41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lfx/_assets/component_index.json +1 -1
- lfx/base/agents/agent.py +21 -4
- lfx/base/agents/altk_base_agent.py +393 -0
- lfx/base/agents/altk_tool_wrappers.py +565 -0
- lfx/base/agents/events.py +2 -1
- lfx/base/composio/composio_base.py +159 -224
- lfx/base/data/base_file.py +97 -20
- lfx/base/data/docling_utils.py +61 -10
- lfx/base/data/storage_utils.py +301 -0
- lfx/base/data/utils.py +178 -14
- lfx/base/mcp/util.py +2 -2
- lfx/base/models/anthropic_constants.py +21 -12
- lfx/base/models/groq_constants.py +74 -58
- lfx/base/models/groq_model_discovery.py +265 -0
- lfx/base/models/model.py +1 -1
- lfx/base/models/model_utils.py +100 -0
- lfx/base/models/openai_constants.py +7 -0
- lfx/base/models/watsonx_constants.py +32 -8
- lfx/base/tools/run_flow.py +601 -129
- lfx/cli/commands.py +9 -4
- lfx/cli/common.py +2 -2
- lfx/cli/run.py +1 -1
- lfx/cli/script_loader.py +53 -11
- lfx/components/Notion/create_page.py +1 -1
- lfx/components/Notion/list_database_properties.py +1 -1
- lfx/components/Notion/list_pages.py +1 -1
- lfx/components/Notion/list_users.py +1 -1
- lfx/components/Notion/page_content_viewer.py +1 -1
- lfx/components/Notion/search.py +1 -1
- lfx/components/Notion/update_page_property.py +1 -1
- lfx/components/__init__.py +19 -5
- lfx/components/{agents → altk}/__init__.py +5 -9
- lfx/components/altk/altk_agent.py +193 -0
- lfx/components/apify/apify_actor.py +1 -1
- lfx/components/composio/__init__.py +70 -18
- lfx/components/composio/apollo_composio.py +11 -0
- lfx/components/composio/bitbucket_composio.py +11 -0
- lfx/components/composio/canva_composio.py +11 -0
- lfx/components/composio/coda_composio.py +11 -0
- lfx/components/composio/composio_api.py +10 -0
- lfx/components/composio/discord_composio.py +1 -1
- lfx/components/composio/elevenlabs_composio.py +11 -0
- lfx/components/composio/exa_composio.py +11 -0
- lfx/components/composio/firecrawl_composio.py +11 -0
- lfx/components/composio/fireflies_composio.py +11 -0
- lfx/components/composio/gmail_composio.py +1 -1
- lfx/components/composio/googlebigquery_composio.py +11 -0
- lfx/components/composio/googlecalendar_composio.py +1 -1
- lfx/components/composio/googledocs_composio.py +1 -1
- lfx/components/composio/googlemeet_composio.py +1 -1
- lfx/components/composio/googlesheets_composio.py +1 -1
- lfx/components/composio/googletasks_composio.py +1 -1
- lfx/components/composio/heygen_composio.py +11 -0
- lfx/components/composio/mem0_composio.py +11 -0
- lfx/components/composio/peopledatalabs_composio.py +11 -0
- lfx/components/composio/perplexityai_composio.py +11 -0
- lfx/components/composio/serpapi_composio.py +11 -0
- lfx/components/composio/slack_composio.py +3 -574
- lfx/components/composio/slackbot_composio.py +1 -1
- lfx/components/composio/snowflake_composio.py +11 -0
- lfx/components/composio/tavily_composio.py +11 -0
- lfx/components/composio/youtube_composio.py +2 -2
- lfx/components/cuga/__init__.py +34 -0
- lfx/components/cuga/cuga_agent.py +730 -0
- lfx/components/data/__init__.py +78 -28
- lfx/components/data_source/__init__.py +58 -0
- lfx/components/{data → data_source}/api_request.py +26 -3
- lfx/components/{data → data_source}/csv_to_data.py +15 -10
- lfx/components/{data → data_source}/json_to_data.py +15 -8
- lfx/components/{data → data_source}/news_search.py +1 -1
- lfx/components/{data → data_source}/rss.py +1 -1
- lfx/components/{data → data_source}/sql_executor.py +1 -1
- lfx/components/{data → data_source}/url.py +1 -1
- lfx/components/{data → data_source}/web_search.py +1 -1
- lfx/components/datastax/astradb_cql.py +1 -1
- lfx/components/datastax/astradb_graph.py +1 -1
- lfx/components/datastax/astradb_tool.py +1 -1
- lfx/components/datastax/astradb_vectorstore.py +1 -1
- lfx/components/datastax/hcd.py +1 -1
- lfx/components/deactivated/json_document_builder.py +1 -1
- lfx/components/docling/__init__.py +0 -3
- lfx/components/docling/chunk_docling_document.py +3 -1
- lfx/components/docling/export_docling_document.py +3 -1
- lfx/components/elastic/elasticsearch.py +1 -1
- lfx/components/files_and_knowledge/__init__.py +47 -0
- lfx/components/{data → files_and_knowledge}/directory.py +1 -1
- lfx/components/{data → files_and_knowledge}/file.py +304 -24
- lfx/components/{knowledge_bases → files_and_knowledge}/retrieval.py +2 -2
- lfx/components/{data → files_and_knowledge}/save_file.py +218 -31
- lfx/components/flow_controls/__init__.py +58 -0
- lfx/components/{logic → flow_controls}/conditional_router.py +1 -1
- lfx/components/{logic → flow_controls}/loop.py +43 -9
- lfx/components/flow_controls/run_flow.py +108 -0
- lfx/components/glean/glean_search_api.py +1 -1
- lfx/components/groq/groq.py +35 -28
- lfx/components/helpers/__init__.py +102 -0
- lfx/components/ibm/watsonx.py +7 -1
- lfx/components/input_output/__init__.py +3 -1
- lfx/components/input_output/chat.py +4 -3
- lfx/components/input_output/chat_output.py +10 -4
- lfx/components/input_output/text.py +1 -1
- lfx/components/input_output/text_output.py +1 -1
- lfx/components/{data → input_output}/webhook.py +1 -1
- lfx/components/knowledge_bases/__init__.py +59 -4
- lfx/components/langchain_utilities/character.py +1 -1
- lfx/components/langchain_utilities/csv_agent.py +84 -16
- lfx/components/langchain_utilities/json_agent.py +67 -12
- lfx/components/langchain_utilities/language_recursive.py +1 -1
- lfx/components/llm_operations/__init__.py +46 -0
- lfx/components/{processing → llm_operations}/batch_run.py +17 -8
- lfx/components/{processing → llm_operations}/lambda_filter.py +1 -1
- lfx/components/{logic → llm_operations}/llm_conditional_router.py +1 -1
- lfx/components/{processing/llm_router.py → llm_operations/llm_selector.py} +3 -3
- lfx/components/{processing → llm_operations}/structured_output.py +1 -1
- lfx/components/logic/__init__.py +126 -0
- lfx/components/mem0/mem0_chat_memory.py +11 -0
- lfx/components/models/__init__.py +64 -9
- lfx/components/models_and_agents/__init__.py +49 -0
- lfx/components/{agents → models_and_agents}/agent.py +6 -4
- lfx/components/models_and_agents/embedding_model.py +353 -0
- lfx/components/models_and_agents/language_model.py +398 -0
- lfx/components/{agents → models_and_agents}/mcp_component.py +53 -44
- lfx/components/{helpers → models_and_agents}/memory.py +1 -1
- lfx/components/nvidia/system_assist.py +1 -1
- lfx/components/olivya/olivya.py +1 -1
- lfx/components/ollama/ollama.py +24 -5
- lfx/components/processing/__init__.py +9 -60
- lfx/components/processing/converter.py +1 -1
- lfx/components/processing/dataframe_operations.py +1 -1
- lfx/components/processing/parse_json_data.py +2 -2
- lfx/components/processing/parser.py +1 -1
- lfx/components/processing/split_text.py +1 -1
- lfx/components/qdrant/qdrant.py +1 -1
- lfx/components/redis/redis.py +1 -1
- lfx/components/twelvelabs/split_video.py +10 -0
- lfx/components/twelvelabs/video_file.py +12 -0
- lfx/components/utilities/__init__.py +43 -0
- lfx/components/{helpers → utilities}/calculator_core.py +1 -1
- lfx/components/{helpers → utilities}/current_date.py +1 -1
- lfx/components/{processing → utilities}/python_repl_core.py +1 -1
- lfx/components/vectorstores/local_db.py +9 -0
- lfx/components/youtube/youtube_transcripts.py +118 -30
- lfx/custom/custom_component/component.py +57 -1
- lfx/custom/custom_component/custom_component.py +68 -6
- lfx/custom/directory_reader/directory_reader.py +5 -2
- lfx/graph/edge/base.py +43 -20
- lfx/graph/state/model.py +15 -2
- lfx/graph/utils.py +6 -0
- lfx/graph/vertex/param_handler.py +10 -7
- lfx/helpers/__init__.py +12 -0
- lfx/helpers/flow.py +117 -0
- lfx/inputs/input_mixin.py +24 -1
- lfx/inputs/inputs.py +13 -1
- lfx/interface/components.py +161 -83
- lfx/log/logger.py +5 -3
- lfx/schema/image.py +2 -12
- lfx/services/database/__init__.py +5 -0
- lfx/services/database/service.py +25 -0
- lfx/services/deps.py +87 -22
- lfx/services/interfaces.py +5 -0
- lfx/services/manager.py +24 -10
- lfx/services/mcp_composer/service.py +1029 -162
- lfx/services/session.py +5 -0
- lfx/services/settings/auth.py +18 -11
- lfx/services/settings/base.py +56 -30
- lfx/services/settings/constants.py +8 -0
- lfx/services/storage/local.py +108 -46
- lfx/services/storage/service.py +171 -29
- lfx/template/field/base.py +3 -0
- lfx/utils/image.py +29 -11
- lfx/utils/ssrf_protection.py +384 -0
- lfx/utils/validate_cloud.py +26 -0
- {lfx_nightly-0.2.0.dev0.dist-info → lfx_nightly-0.2.0.dev41.dist-info}/METADATA +38 -22
- {lfx_nightly-0.2.0.dev0.dist-info → lfx_nightly-0.2.0.dev41.dist-info}/RECORD +189 -160
- {lfx_nightly-0.2.0.dev0.dist-info → lfx_nightly-0.2.0.dev41.dist-info}/WHEEL +1 -1
- lfx/components/agents/altk_agent.py +0 -366
- lfx/components/agents/cuga_agent.py +0 -1013
- lfx/components/docling/docling_remote_vlm.py +0 -284
- lfx/components/logic/run_flow.py +0 -71
- lfx/components/models/embedding_model.py +0 -195
- lfx/components/models/language_model.py +0 -144
- lfx/components/processing/dataframe_to_toolset.py +0 -259
- /lfx/components/{data → data_source}/mock_data.py +0 -0
- /lfx/components/{knowledge_bases → files_and_knowledge}/ingestion.py +0 -0
- /lfx/components/{logic → flow_controls}/data_conditional_router.py +0 -0
- /lfx/components/{logic → flow_controls}/flow_tool.py +0 -0
- /lfx/components/{logic → flow_controls}/listen.py +0 -0
- /lfx/components/{logic → flow_controls}/notify.py +0 -0
- /lfx/components/{logic → flow_controls}/pass_message.py +0 -0
- /lfx/components/{logic → flow_controls}/sub_flow.py +0 -0
- /lfx/components/{processing → models_and_agents}/prompt.py +0 -0
- /lfx/components/{helpers → processing}/create_list.py +0 -0
- /lfx/components/{helpers → processing}/output_parser.py +0 -0
- /lfx/components/{helpers → processing}/store_message.py +0 -0
- /lfx/components/{helpers → utilities}/id_generator.py +0 -0
- {lfx_nightly-0.2.0.dev0.dist-info → lfx_nightly-0.2.0.dev41.dist-info}/entry_points.txt +0 -0
lfx/base/data/utils.py
CHANGED
|
@@ -1,14 +1,21 @@
|
|
|
1
|
+
import contextlib
|
|
2
|
+
import tempfile
|
|
1
3
|
import unicodedata
|
|
2
4
|
from collections.abc import Callable
|
|
3
5
|
from concurrent import futures
|
|
6
|
+
from io import BytesIO
|
|
4
7
|
from pathlib import Path
|
|
5
8
|
|
|
6
9
|
import chardet
|
|
7
10
|
import orjson
|
|
8
11
|
import yaml
|
|
9
12
|
from defusedxml import ElementTree
|
|
13
|
+
from pypdf import PdfReader
|
|
10
14
|
|
|
15
|
+
from lfx.base.data.storage_utils import read_file_bytes
|
|
11
16
|
from lfx.schema.data import Data
|
|
17
|
+
from lfx.services.deps import get_settings_service
|
|
18
|
+
from lfx.utils.async_helpers import run_until_complete
|
|
12
19
|
|
|
13
20
|
# Types of files that can be read simply by file.read()
|
|
14
21
|
# and have 100% to be completely readable
|
|
@@ -36,6 +43,34 @@ TEXT_FILE_TYPES = [
|
|
|
36
43
|
IMG_FILE_TYPES = ["jpg", "jpeg", "png", "bmp", "image"]
|
|
37
44
|
|
|
38
45
|
|
|
46
|
+
def parse_structured_text(text: str, file_path: str) -> str | dict | list:
|
|
47
|
+
"""Parse structured text formats (JSON, YAML, XML) and normalize text.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
text: The text content to parse
|
|
51
|
+
file_path: The file path (used to determine format)
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
Parsed content (dict/list for JSON, dict for YAML, str for XML)
|
|
55
|
+
"""
|
|
56
|
+
if file_path.endswith(".json"):
|
|
57
|
+
loaded_json = orjson.loads(text)
|
|
58
|
+
if isinstance(loaded_json, dict):
|
|
59
|
+
loaded_json = {k: normalize_text(v) if isinstance(v, str) else v for k, v in loaded_json.items()}
|
|
60
|
+
elif isinstance(loaded_json, list):
|
|
61
|
+
loaded_json = [normalize_text(item) if isinstance(item, str) else item for item in loaded_json]
|
|
62
|
+
return orjson.dumps(loaded_json).decode("utf-8")
|
|
63
|
+
|
|
64
|
+
if file_path.endswith((".yaml", ".yml")):
|
|
65
|
+
return yaml.safe_load(text)
|
|
66
|
+
|
|
67
|
+
if file_path.endswith(".xml"):
|
|
68
|
+
xml_element = ElementTree.fromstring(text)
|
|
69
|
+
return ElementTree.tostring(xml_element, encoding="unicode")
|
|
70
|
+
|
|
71
|
+
return text
|
|
72
|
+
|
|
73
|
+
|
|
39
74
|
def normalize_text(text):
|
|
40
75
|
return unicodedata.normalize("NFKD", text)
|
|
41
76
|
|
|
@@ -109,6 +144,14 @@ def partition_file_to_data(file_path: str, *, silent_errors: bool) -> Data | Non
|
|
|
109
144
|
|
|
110
145
|
|
|
111
146
|
def read_text_file(file_path: str) -> str:
|
|
147
|
+
"""Read a text file with automatic encoding detection.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
file_path: Path to the file (local path only, not storage service path)
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
str: The file content as text
|
|
154
|
+
"""
|
|
112
155
|
file_path_ = Path(file_path)
|
|
113
156
|
raw_data = file_path_.read_bytes()
|
|
114
157
|
result = chardet.detect(raw_data)
|
|
@@ -120,13 +163,90 @@ def read_text_file(file_path: str) -> str:
|
|
|
120
163
|
return file_path_.read_text(encoding=encoding)
|
|
121
164
|
|
|
122
165
|
|
|
166
|
+
async def read_text_file_async(file_path: str) -> str:
|
|
167
|
+
"""Read a text file with automatic encoding detection (async, storage-aware).
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
file_path: Path to the file (S3 key format "flow_id/filename" or local path)
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
str: The file content as text
|
|
174
|
+
"""
|
|
175
|
+
from .storage_utils import read_file_bytes
|
|
176
|
+
|
|
177
|
+
# Use storage-aware read to get bytes
|
|
178
|
+
raw_data = await read_file_bytes(file_path)
|
|
179
|
+
|
|
180
|
+
# Auto-detect encoding
|
|
181
|
+
result = chardet.detect(raw_data)
|
|
182
|
+
encoding = result.get("encoding")
|
|
183
|
+
|
|
184
|
+
# If encoding detection fails (e.g., binary file), default to utf-8
|
|
185
|
+
if not encoding or encoding in {"Windows-1252", "Windows-1254", "MacRoman"}:
|
|
186
|
+
encoding = "utf-8"
|
|
187
|
+
|
|
188
|
+
return raw_data.decode(encoding, errors="replace")
|
|
189
|
+
|
|
190
|
+
|
|
123
191
|
def read_docx_file(file_path: str) -> str:
|
|
192
|
+
"""Read a DOCX file and extract text.
|
|
193
|
+
|
|
194
|
+
ote: python-docx requires a file path, so this only works with local files.
|
|
195
|
+
For storage service files, use read_docx_file_async which downloads to temp.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
file_path: Path to the DOCX file (local path only)
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
str: Extracted text from the document
|
|
202
|
+
"""
|
|
124
203
|
from docx import Document
|
|
125
204
|
|
|
126
205
|
doc = Document(file_path)
|
|
127
206
|
return "\n\n".join([p.text for p in doc.paragraphs])
|
|
128
207
|
|
|
129
208
|
|
|
209
|
+
async def read_docx_file_async(file_path: str) -> str:
|
|
210
|
+
"""Read a DOCX file and extract text (async, storage-aware).
|
|
211
|
+
|
|
212
|
+
For S3 storage, downloads to temp file (python-docx requires file path).
|
|
213
|
+
For local storage, reads directly.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
file_path: Path to the DOCX file (S3 key format "flow_id/filename" or local path)
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
str: Extracted text from the document
|
|
220
|
+
"""
|
|
221
|
+
from docx import Document
|
|
222
|
+
|
|
223
|
+
from .storage_utils import read_file_bytes
|
|
224
|
+
|
|
225
|
+
settings = get_settings_service().settings
|
|
226
|
+
|
|
227
|
+
if settings.storage_type == "local":
|
|
228
|
+
# Local storage - read directly
|
|
229
|
+
doc = Document(file_path)
|
|
230
|
+
return "\n\n".join([p.text for p in doc.paragraphs])
|
|
231
|
+
|
|
232
|
+
# S3 storage - need temp file for python-docx (doesn't support BytesIO)
|
|
233
|
+
content = await read_file_bytes(file_path)
|
|
234
|
+
|
|
235
|
+
# Create temp file with .docx extension
|
|
236
|
+
# Extract filename from path for suffix
|
|
237
|
+
suffix = Path(file_path.split("/")[-1]).suffix
|
|
238
|
+
with tempfile.NamedTemporaryFile(mode="wb", suffix=suffix, delete=False) as tmp_file:
|
|
239
|
+
tmp_file.write(content)
|
|
240
|
+
temp_path = tmp_file.name
|
|
241
|
+
|
|
242
|
+
try:
|
|
243
|
+
doc = Document(temp_path)
|
|
244
|
+
return "\n\n".join([p.text for p in doc.paragraphs])
|
|
245
|
+
finally:
|
|
246
|
+
with contextlib.suppress(Exception):
|
|
247
|
+
Path(temp_path).unlink()
|
|
248
|
+
|
|
249
|
+
|
|
130
250
|
def parse_pdf_to_text(file_path: str) -> str:
|
|
131
251
|
from pypdf import PdfReader
|
|
132
252
|
|
|
@@ -134,7 +254,35 @@ def parse_pdf_to_text(file_path: str) -> str:
|
|
|
134
254
|
return "\n\n".join([page.extract_text() for page in reader.pages])
|
|
135
255
|
|
|
136
256
|
|
|
257
|
+
async def parse_pdf_to_text_async(file_path: str) -> str:
|
|
258
|
+
"""Parse a PDF file to extract text (async, storage-aware).
|
|
259
|
+
|
|
260
|
+
Uses storage-aware file reading to support both local and S3 storage.
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
file_path: Path to the PDF file (S3 key format "flow_id/filename" or local path)
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
str: Extracted text from all pages
|
|
267
|
+
"""
|
|
268
|
+
content = await read_file_bytes(file_path)
|
|
269
|
+
with BytesIO(content) as f, PdfReader(f) as reader:
|
|
270
|
+
return "\n\n".join([page.extract_text() for page in reader.pages])
|
|
271
|
+
|
|
272
|
+
|
|
137
273
|
def parse_text_file_to_data(file_path: str, *, silent_errors: bool) -> Data | None:
|
|
274
|
+
"""Parse a text file to Data (sync version).
|
|
275
|
+
|
|
276
|
+
For S3 storage, this will use async operations to fetch the file.
|
|
277
|
+
For local storage, reads directly from filesystem.
|
|
278
|
+
"""
|
|
279
|
+
settings = get_settings_service().settings
|
|
280
|
+
|
|
281
|
+
# If using S3 storage, we need to use async operations
|
|
282
|
+
if settings.storage_type == "s3":
|
|
283
|
+
# Run the async version safely (handles existing event loops)
|
|
284
|
+
return run_until_complete(parse_text_file_to_data_async(file_path, silent_errors=silent_errors))
|
|
285
|
+
|
|
138
286
|
try:
|
|
139
287
|
if file_path.endswith(".pdf"):
|
|
140
288
|
text = parse_pdf_to_text(file_path)
|
|
@@ -143,20 +291,7 @@ def parse_text_file_to_data(file_path: str, *, silent_errors: bool) -> Data | No
|
|
|
143
291
|
else:
|
|
144
292
|
text = read_text_file(file_path)
|
|
145
293
|
|
|
146
|
-
|
|
147
|
-
if file_path.endswith(".json"):
|
|
148
|
-
loaded_json = orjson.loads(text)
|
|
149
|
-
if isinstance(loaded_json, dict):
|
|
150
|
-
loaded_json = {k: normalize_text(v) if isinstance(v, str) else v for k, v in loaded_json.items()}
|
|
151
|
-
elif isinstance(loaded_json, list):
|
|
152
|
-
loaded_json = [normalize_text(item) if isinstance(item, str) else item for item in loaded_json]
|
|
153
|
-
text = orjson.dumps(loaded_json).decode("utf-8")
|
|
154
|
-
|
|
155
|
-
elif file_path.endswith((".yaml", ".yml")):
|
|
156
|
-
text = yaml.safe_load(text)
|
|
157
|
-
elif file_path.endswith(".xml"):
|
|
158
|
-
xml_element = ElementTree.fromstring(text)
|
|
159
|
-
text = ElementTree.tostring(xml_element, encoding="unicode")
|
|
294
|
+
text = parse_structured_text(text, file_path)
|
|
160
295
|
except Exception as e:
|
|
161
296
|
if not silent_errors:
|
|
162
297
|
msg = f"Error loading file {file_path}: {e}"
|
|
@@ -166,6 +301,35 @@ def parse_text_file_to_data(file_path: str, *, silent_errors: bool) -> Data | No
|
|
|
166
301
|
return Data(data={"file_path": file_path, "text": text})
|
|
167
302
|
|
|
168
303
|
|
|
304
|
+
async def parse_text_file_to_data_async(file_path: str, *, silent_errors: bool) -> Data | None:
|
|
305
|
+
"""Parse a text file to Data (async version, supports storage service).
|
|
306
|
+
|
|
307
|
+
This version properly handles storage service files:
|
|
308
|
+
- For text/JSON/YAML/XML: reads bytes directly (no temp file)
|
|
309
|
+
- For PDF: reads bytes directly via BytesIO (no temp file)
|
|
310
|
+
- For DOCX: downloads to temp file (python-docx requires file path)
|
|
311
|
+
"""
|
|
312
|
+
try:
|
|
313
|
+
if file_path.endswith(".pdf"):
|
|
314
|
+
text = await parse_pdf_to_text_async(file_path)
|
|
315
|
+
elif file_path.endswith(".docx"):
|
|
316
|
+
text = await read_docx_file_async(file_path)
|
|
317
|
+
else:
|
|
318
|
+
# Text files - read directly, no temp file needed
|
|
319
|
+
text = await read_text_file_async(file_path)
|
|
320
|
+
|
|
321
|
+
# Parse structured formats (JSON, YAML, XML)
|
|
322
|
+
text = parse_structured_text(text, file_path)
|
|
323
|
+
|
|
324
|
+
return Data(data={"file_path": file_path, "text": text})
|
|
325
|
+
|
|
326
|
+
except Exception as e:
|
|
327
|
+
if not silent_errors:
|
|
328
|
+
msg = f"Error loading file {file_path}: {e}"
|
|
329
|
+
raise ValueError(msg) from e
|
|
330
|
+
return None
|
|
331
|
+
|
|
332
|
+
|
|
169
333
|
# ! Removing unstructured dependency until
|
|
170
334
|
# ! 3.12 is supported
|
|
171
335
|
# def get_elements(
|
lfx/base/mcp/util.py
CHANGED
|
@@ -23,6 +23,7 @@ from pydantic import BaseModel
|
|
|
23
23
|
from lfx.log.logger import logger
|
|
24
24
|
from lfx.schema.json_schema import create_input_schema_from_json_schema
|
|
25
25
|
from lfx.services.deps import get_settings_service
|
|
26
|
+
from lfx.utils.async_helpers import run_until_complete
|
|
26
27
|
|
|
27
28
|
HTTP_ERROR_STATUS_CODE = httpx_codes.BAD_REQUEST # HTTP status code for client errors
|
|
28
29
|
|
|
@@ -351,8 +352,7 @@ def create_tool_func(tool_name: str, arg_schema: type[BaseModel], client) -> Cal
|
|
|
351
352
|
_handle_tool_validation_error(e, tool_name, provided_args, arg_schema)
|
|
352
353
|
|
|
353
354
|
try:
|
|
354
|
-
|
|
355
|
-
return loop.run_until_complete(client.run_tool(tool_name, arguments=validated.model_dump()))
|
|
355
|
+
return run_until_complete(client.run_tool(tool_name, arguments=validated.model_dump()))
|
|
356
356
|
except Exception as e:
|
|
357
357
|
logger.error(f"Tool '{tool_name}' execution failed: {e}")
|
|
358
358
|
# Re-raise with more context
|
|
@@ -2,32 +2,41 @@ from .model_metadata import create_model_metadata
|
|
|
2
2
|
|
|
3
3
|
ANTHROPIC_MODELS_DETAILED = [
|
|
4
4
|
# Tool calling supported models
|
|
5
|
+
create_model_metadata(provider="Anthropic", name="claude-opus-4-5-20251101", icon="Anthropic", tool_calling=True),
|
|
6
|
+
create_model_metadata(provider="Anthropic", name="claude-haiku-4-5-20251001", icon="Anthropic", tool_calling=True),
|
|
5
7
|
create_model_metadata(provider="Anthropic", name="claude-sonnet-4-5-20250929", icon="Anthropic", tool_calling=True),
|
|
6
8
|
create_model_metadata(provider="Anthropic", name="claude-opus-4-1-20250805", icon="Anthropic", tool_calling=True),
|
|
7
9
|
create_model_metadata(provider="Anthropic", name="claude-opus-4-20250514", icon="Anthropic", tool_calling=True),
|
|
8
10
|
create_model_metadata(provider="Anthropic", name="claude-sonnet-4-20250514", icon="Anthropic", tool_calling=True),
|
|
9
|
-
create_model_metadata(provider="Anthropic", name="claude-3-
|
|
10
|
-
create_model_metadata(provider="Anthropic", name="claude-3-
|
|
11
|
-
|
|
12
|
-
create_model_metadata(
|
|
11
|
+
create_model_metadata(provider="Anthropic", name="claude-3-5-haiku-20241022", icon="Anthropic", tool_calling=True),
|
|
12
|
+
create_model_metadata(provider="Anthropic", name="claude-3-haiku-20240307", icon="Anthropic", tool_calling=True),
|
|
13
|
+
# Deprecated models
|
|
14
|
+
create_model_metadata(
|
|
15
|
+
provider="Anthropic", name="claude-3-7-sonnet-latest", icon="Anthropic", tool_calling=True, deprecated=True
|
|
16
|
+
),
|
|
17
|
+
create_model_metadata(
|
|
18
|
+
provider="Anthropic", name="claude-3-5-sonnet-latest", icon="Anthropic", tool_calling=True, deprecated=True
|
|
19
|
+
),
|
|
20
|
+
create_model_metadata(
|
|
21
|
+
provider="Anthropic", name="claude-3-5-haiku-latest", icon="Anthropic", tool_calling=True, deprecated=True
|
|
22
|
+
),
|
|
23
|
+
create_model_metadata(
|
|
24
|
+
provider="Anthropic", name="claude-3-opus-latest", icon="Anthropic", tool_calling=True, deprecated=True
|
|
25
|
+
),
|
|
13
26
|
create_model_metadata(
|
|
14
27
|
provider="Anthropic", name="claude-3-sonnet-20240229", icon="Anthropic", tool_calling=True, deprecated=True
|
|
15
28
|
),
|
|
16
|
-
# Tool calling unsupported models
|
|
17
|
-
create_model_metadata(provider="Anthropic", name="claude-2.1", icon="Anthropic", tool_calling=False),
|
|
18
|
-
create_model_metadata(provider="Anthropic", name="claude-2.0", icon="Anthropic", tool_calling=False),
|
|
19
|
-
# Deprecated models
|
|
20
29
|
create_model_metadata(
|
|
21
|
-
provider="Anthropic", name="claude-
|
|
30
|
+
provider="Anthropic", name="claude-2.1", icon="Anthropic", tool_calling=False, deprecated=True
|
|
22
31
|
),
|
|
23
32
|
create_model_metadata(
|
|
24
|
-
provider="Anthropic", name="claude-
|
|
33
|
+
provider="Anthropic", name="claude-2.0", icon="Anthropic", tool_calling=False, deprecated=True
|
|
25
34
|
),
|
|
26
35
|
create_model_metadata(
|
|
27
|
-
provider="Anthropic", name="claude-3-5-
|
|
36
|
+
provider="Anthropic", name="claude-3-5-sonnet-20240620", icon="Anthropic", tool_calling=True, deprecated=True
|
|
28
37
|
),
|
|
29
38
|
create_model_metadata(
|
|
30
|
-
provider="Anthropic", name="claude-3-
|
|
39
|
+
provider="Anthropic", name="claude-3-5-sonnet-20241022", icon="Anthropic", tool_calling=True, deprecated=True
|
|
31
40
|
),
|
|
32
41
|
]
|
|
33
42
|
|
|
@@ -1,87 +1,91 @@
|
|
|
1
1
|
from .model_metadata import create_model_metadata
|
|
2
2
|
|
|
3
|
-
# Unified model metadata
|
|
3
|
+
# Unified model metadata
|
|
4
|
+
#
|
|
5
|
+
# NOTE: This file serves as a FALLBACK when the dynamic model discovery system
|
|
6
|
+
# (groq_model_discovery.py) cannot fetch fresh data from the Groq API.
|
|
7
|
+
#
|
|
8
|
+
# The dynamic system is the PRIMARY source and will:
|
|
9
|
+
# - Fetch available models directly from Groq API
|
|
10
|
+
# - Test each model for tool calling support automatically
|
|
11
|
+
# - Cache results for 24 hours
|
|
12
|
+
# - Always provide up-to-date model lists
|
|
13
|
+
#
|
|
14
|
+
# This fallback list should contain:
|
|
15
|
+
# - Minimal set of stable production models
|
|
16
|
+
# - Deprecated models for backwards compatibility
|
|
17
|
+
# - Non-LLM models (audio, TTS) marked as not_supported
|
|
18
|
+
#
|
|
19
|
+
# Last manually updated: 2025-01-06
|
|
20
|
+
#
|
|
4
21
|
GROQ_MODELS_DETAILED = [
|
|
5
|
-
#
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
),
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
create_model_metadata( #
|
|
13
|
-
provider="Groq", name="
|
|
14
|
-
),
|
|
15
|
-
create_model_metadata( # Meta
|
|
16
|
-
provider="Groq", name="llama-guard-3-8b", icon="Groq"
|
|
22
|
+
# ===== FALLBACK PRODUCTION MODELS =====
|
|
23
|
+
# These are stable models that are very unlikely to be removed
|
|
24
|
+
create_model_metadata(provider="Groq", name="llama-3.1-8b-instant", icon="Groq", tool_calling=True),
|
|
25
|
+
create_model_metadata(provider="Groq", name="llama-3.3-70b-versatile", icon="Groq", tool_calling=True),
|
|
26
|
+
# ===== DEPRECATED MODELS =====
|
|
27
|
+
# Keep these for backwards compatibility - users may have flows using them
|
|
28
|
+
# These will appear in the list but show as deprecated in the UI
|
|
29
|
+
create_model_metadata( # Google - Removed
|
|
30
|
+
provider="Groq", name="gemma2-9b-it", icon="Groq", deprecated=True
|
|
17
31
|
),
|
|
18
|
-
create_model_metadata( #
|
|
19
|
-
provider="Groq", name="
|
|
20
|
-
),
|
|
21
|
-
create_model_metadata( # Meta
|
|
22
|
-
provider="Groq", name="llama3-8b-8192", icon="Groq"
|
|
23
|
-
),
|
|
24
|
-
# Preview Models - For evaluation purposes only
|
|
25
|
-
create_model_metadata( # Meta
|
|
26
|
-
provider="Groq", name="meta-llama/llama-4-scout-17b-16e-instruct", icon="Groq", tool_calling=True, preview=True
|
|
32
|
+
create_model_metadata( # Google
|
|
33
|
+
provider="Groq", name="gemma-7b-it", icon="Groq", deprecated=True
|
|
27
34
|
),
|
|
28
|
-
create_model_metadata( # Meta
|
|
29
|
-
provider="Groq",
|
|
30
|
-
name="meta-llama/llama-4-maverick-17b-128e-instruct",
|
|
31
|
-
icon="Groq",
|
|
32
|
-
tool_calling=True,
|
|
33
|
-
preview=True,
|
|
35
|
+
create_model_metadata( # Meta - Removed
|
|
36
|
+
provider="Groq", name="llama3-70b-8192", icon="Groq", deprecated=True
|
|
34
37
|
),
|
|
35
|
-
create_model_metadata( #
|
|
36
|
-
provider="Groq", name="
|
|
38
|
+
create_model_metadata( # Meta - Removed
|
|
39
|
+
provider="Groq", name="llama3-8b-8192", icon="Groq", deprecated=True
|
|
37
40
|
),
|
|
38
|
-
create_model_metadata( #
|
|
39
|
-
provider="Groq", name="
|
|
41
|
+
create_model_metadata( # Meta - Removed, replaced by llama-guard-4-12b
|
|
42
|
+
provider="Groq", name="llama-guard-3-8b", icon="Groq", deprecated=True
|
|
40
43
|
),
|
|
41
|
-
create_model_metadata( #
|
|
42
|
-
provider="Groq", name="
|
|
44
|
+
create_model_metadata( # Meta - Removed
|
|
45
|
+
provider="Groq", name="llama-3.2-1b-preview", icon="Groq", deprecated=True
|
|
43
46
|
),
|
|
44
|
-
create_model_metadata( #
|
|
45
|
-
provider="Groq", name="
|
|
47
|
+
create_model_metadata( # Meta - Removed
|
|
48
|
+
provider="Groq", name="llama-3.2-3b-preview", icon="Groq", deprecated=True
|
|
46
49
|
),
|
|
47
|
-
create_model_metadata( #
|
|
48
|
-
provider="Groq", name="
|
|
50
|
+
create_model_metadata( # Meta - Removed
|
|
51
|
+
provider="Groq", name="llama-3.2-11b-vision-preview", icon="Groq", deprecated=True
|
|
49
52
|
),
|
|
50
|
-
create_model_metadata( # Meta
|
|
51
|
-
provider="Groq", name="llama-3.
|
|
53
|
+
create_model_metadata( # Meta - Removed
|
|
54
|
+
provider="Groq", name="llama-3.2-90b-vision-preview", icon="Groq", deprecated=True
|
|
52
55
|
),
|
|
53
|
-
create_model_metadata( # Meta
|
|
54
|
-
provider="Groq", name="llama-3.
|
|
56
|
+
create_model_metadata( # Meta - Removed
|
|
57
|
+
provider="Groq", name="llama-3.3-70b-specdec", icon="Groq", deprecated=True
|
|
55
58
|
),
|
|
56
|
-
create_model_metadata( #
|
|
57
|
-
provider="Groq", name="
|
|
59
|
+
create_model_metadata( # Alibaba - Removed, replaced by qwen/qwen3-32b
|
|
60
|
+
provider="Groq", name="qwen-qwq-32b", icon="Groq", deprecated=True
|
|
58
61
|
),
|
|
59
|
-
create_model_metadata( #
|
|
60
|
-
provider="Groq", name="
|
|
62
|
+
create_model_metadata( # Alibaba - Removed
|
|
63
|
+
provider="Groq", name="qwen-2.5-coder-32b", icon="Groq", deprecated=True
|
|
61
64
|
),
|
|
62
|
-
create_model_metadata( #
|
|
63
|
-
provider="Groq", name="
|
|
65
|
+
create_model_metadata( # Alibaba - Removed
|
|
66
|
+
provider="Groq", name="qwen-2.5-32b", icon="Groq", deprecated=True
|
|
64
67
|
),
|
|
65
|
-
create_model_metadata( #
|
|
66
|
-
provider="Groq", name="
|
|
68
|
+
create_model_metadata( # DeepSeek - Removed
|
|
69
|
+
provider="Groq", name="deepseek-r1-distill-qwen-32b", icon="Groq", deprecated=True
|
|
67
70
|
),
|
|
68
|
-
#
|
|
69
|
-
|
|
70
|
-
provider="Groq", name="gemma-7b-it", icon="Groq", tool_calling=True, deprecated=True
|
|
71
|
+
create_model_metadata( # DeepSeek - Removed
|
|
72
|
+
provider="Groq", name="deepseek-r1-distill-llama-70b", icon="Groq", deprecated=True
|
|
71
73
|
),
|
|
72
74
|
create_model_metadata( # Groq
|
|
73
|
-
provider="Groq", name="llama3-groq-70b-8192-tool-use-preview", icon="Groq",
|
|
75
|
+
provider="Groq", name="llama3-groq-70b-8192-tool-use-preview", icon="Groq", deprecated=True
|
|
74
76
|
),
|
|
75
77
|
create_model_metadata( # Groq
|
|
76
|
-
provider="Groq", name="llama3-groq-8b-8192-tool-use-preview", icon="Groq",
|
|
78
|
+
provider="Groq", name="llama3-groq-8b-8192-tool-use-preview", icon="Groq", deprecated=True
|
|
77
79
|
),
|
|
78
80
|
create_model_metadata( # Meta
|
|
79
|
-
provider="Groq", name="llama-3.1-70b-versatile", icon="Groq",
|
|
81
|
+
provider="Groq", name="llama-3.1-70b-versatile", icon="Groq", deprecated=True
|
|
80
82
|
),
|
|
81
83
|
create_model_metadata( # Mistral
|
|
82
|
-
provider="Groq", name="mixtral-8x7b-32768", icon="Groq",
|
|
84
|
+
provider="Groq", name="mixtral-8x7b-32768", icon="Groq", deprecated=True
|
|
83
85
|
),
|
|
84
|
-
#
|
|
86
|
+
# ===== UNSUPPORTED MODELS =====
|
|
87
|
+
# Audio/TTS/Guard models that should not appear in LLM model lists
|
|
88
|
+
# The dynamic system automatically filters these out
|
|
85
89
|
create_model_metadata( # Mistral
|
|
86
90
|
provider="Groq", name="mistral-saba-24b", icon="Groq", not_supported=True
|
|
87
91
|
),
|
|
@@ -100,6 +104,18 @@ GROQ_MODELS_DETAILED = [
|
|
|
100
104
|
create_model_metadata( # Hugging Face
|
|
101
105
|
provider="Groq", name="distil-whisper-large-v3-en", icon="Groq", not_supported=True
|
|
102
106
|
),
|
|
107
|
+
create_model_metadata( # Meta
|
|
108
|
+
provider="Groq", name="meta-llama/llama-guard-4-12b", icon="Groq", not_supported=True
|
|
109
|
+
),
|
|
110
|
+
create_model_metadata( # Meta
|
|
111
|
+
provider="Groq", name="meta-llama/llama-prompt-guard-2-86m", icon="Groq", not_supported=True
|
|
112
|
+
),
|
|
113
|
+
create_model_metadata( # Meta
|
|
114
|
+
provider="Groq", name="meta-llama/llama-prompt-guard-2-22m", icon="Groq", not_supported=True
|
|
115
|
+
),
|
|
116
|
+
create_model_metadata( # OpenAI
|
|
117
|
+
provider="Groq", name="openai/gpt-oss-safeguard-20b", icon="Groq", not_supported=True
|
|
118
|
+
),
|
|
103
119
|
]
|
|
104
120
|
|
|
105
121
|
# Generate backwards-compatible lists from the metadata
|