PyPI - lfx-nightly - Versions diffs - 0.2.0.dev0__py3-none-any.whl → 0.2.0.dev26__py3-none-any.whl - Mend

lfx-nightly 0.2.0.dev0py3-none-any.whl → 0.2.0.dev26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (188) hide show

lfx/_assets/component_index.json +1 -1
lfx/base/agents/agent.py +13 -1
lfx/base/agents/altk_base_agent.py +380 -0
lfx/base/agents/altk_tool_wrappers.py +565 -0
lfx/base/agents/events.py +2 -1
lfx/base/composio/composio_base.py +159 -224
lfx/base/data/base_file.py +88 -21
lfx/base/data/storage_utils.py +192 -0
lfx/base/data/utils.py +178 -14
lfx/base/embeddings/embeddings_class.py +113 -0
lfx/base/models/groq_constants.py +74 -58
lfx/base/models/groq_model_discovery.py +265 -0
lfx/base/models/model.py +1 -1
lfx/base/models/model_utils.py +100 -0
lfx/base/models/openai_constants.py +7 -0
lfx/base/models/watsonx_constants.py +32 -8
lfx/base/tools/run_flow.py +601 -129
lfx/cli/commands.py +6 -3
lfx/cli/common.py +2 -2
lfx/cli/run.py +1 -1
lfx/cli/script_loader.py +53 -11
lfx/components/Notion/create_page.py +1 -1
lfx/components/Notion/list_database_properties.py +1 -1
lfx/components/Notion/list_pages.py +1 -1
lfx/components/Notion/list_users.py +1 -1
lfx/components/Notion/page_content_viewer.py +1 -1
lfx/components/Notion/search.py +1 -1
lfx/components/Notion/update_page_property.py +1 -1
lfx/components/__init__.py +19 -5
lfx/components/{agents → altk}/__init__.py +5 -9
lfx/components/altk/altk_agent.py +193 -0
lfx/components/apify/apify_actor.py +1 -1
lfx/components/composio/__init__.py +70 -18
lfx/components/composio/apollo_composio.py +11 -0
lfx/components/composio/bitbucket_composio.py +11 -0
lfx/components/composio/canva_composio.py +11 -0
lfx/components/composio/coda_composio.py +11 -0
lfx/components/composio/composio_api.py +10 -0
lfx/components/composio/discord_composio.py +1 -1
lfx/components/composio/elevenlabs_composio.py +11 -0
lfx/components/composio/exa_composio.py +11 -0
lfx/components/composio/firecrawl_composio.py +11 -0
lfx/components/composio/fireflies_composio.py +11 -0
lfx/components/composio/gmail_composio.py +1 -1
lfx/components/composio/googlebigquery_composio.py +11 -0
lfx/components/composio/googlecalendar_composio.py +1 -1
lfx/components/composio/googledocs_composio.py +1 -1
lfx/components/composio/googlemeet_composio.py +1 -1
lfx/components/composio/googlesheets_composio.py +1 -1
lfx/components/composio/googletasks_composio.py +1 -1
lfx/components/composio/heygen_composio.py +11 -0
lfx/components/composio/mem0_composio.py +11 -0
lfx/components/composio/peopledatalabs_composio.py +11 -0
lfx/components/composio/perplexityai_composio.py +11 -0
lfx/components/composio/serpapi_composio.py +11 -0
lfx/components/composio/slack_composio.py +3 -574
lfx/components/composio/slackbot_composio.py +1 -1
lfx/components/composio/snowflake_composio.py +11 -0
lfx/components/composio/tavily_composio.py +11 -0
lfx/components/composio/youtube_composio.py +2 -2
lfx/components/cuga/__init__.py +34 -0
lfx/components/cuga/cuga_agent.py +730 -0
lfx/components/data/__init__.py +78 -28
lfx/components/data_source/__init__.py +58 -0
lfx/components/{data → data_source}/api_request.py +26 -3
lfx/components/{data → data_source}/csv_to_data.py +15 -10
lfx/components/{data → data_source}/json_to_data.py +15 -8
lfx/components/{data → data_source}/news_search.py +1 -1
lfx/components/{data → data_source}/rss.py +1 -1
lfx/components/{data → data_source}/sql_executor.py +1 -1
lfx/components/{data → data_source}/url.py +1 -1
lfx/components/{data → data_source}/web_search.py +1 -1
lfx/components/datastax/astradb_cql.py +1 -1
lfx/components/datastax/astradb_graph.py +1 -1
lfx/components/datastax/astradb_tool.py +1 -1
lfx/components/datastax/astradb_vectorstore.py +1 -1
lfx/components/datastax/hcd.py +1 -1
lfx/components/deactivated/json_document_builder.py +1 -1
lfx/components/docling/__init__.py +0 -3
lfx/components/elastic/elasticsearch.py +1 -1
lfx/components/elastic/opensearch_multimodal.py +1575 -0
lfx/components/files_and_knowledge/__init__.py +47 -0
lfx/components/{data → files_and_knowledge}/directory.py +1 -1
lfx/components/{data → files_and_knowledge}/file.py +246 -18
lfx/components/{knowledge_bases → files_and_knowledge}/retrieval.py +2 -2
lfx/components/{data → files_and_knowledge}/save_file.py +142 -22
lfx/components/flow_controls/__init__.py +58 -0
lfx/components/{logic → flow_controls}/conditional_router.py +1 -1
lfx/components/{logic → flow_controls}/loop.py +43 -9
lfx/components/flow_controls/run_flow.py +108 -0
lfx/components/glean/glean_search_api.py +1 -1
lfx/components/groq/groq.py +35 -28
lfx/components/helpers/__init__.py +102 -0
lfx/components/input_output/__init__.py +3 -1
lfx/components/input_output/chat.py +4 -3
lfx/components/input_output/chat_output.py +4 -4
lfx/components/input_output/text.py +1 -1
lfx/components/input_output/text_output.py +1 -1
lfx/components/{data → input_output}/webhook.py +1 -1
lfx/components/knowledge_bases/__init__.py +59 -4
lfx/components/langchain_utilities/character.py +1 -1
lfx/components/langchain_utilities/csv_agent.py +84 -16
lfx/components/langchain_utilities/json_agent.py +67 -12
lfx/components/langchain_utilities/language_recursive.py +1 -1
lfx/components/llm_operations/__init__.py +46 -0
lfx/components/{processing → llm_operations}/batch_run.py +1 -1
lfx/components/{processing → llm_operations}/lambda_filter.py +1 -1
lfx/components/{logic → llm_operations}/llm_conditional_router.py +1 -1
lfx/components/{processing/llm_router.py → llm_operations/llm_selector.py} +3 -3
lfx/components/{processing → llm_operations}/structured_output.py +1 -1
lfx/components/logic/__init__.py +126 -0
lfx/components/mem0/mem0_chat_memory.py +11 -0
lfx/components/models/__init__.py +64 -9
lfx/components/models_and_agents/__init__.py +49 -0
lfx/components/{agents → models_and_agents}/agent.py +2 -2
lfx/components/models_and_agents/embedding_model.py +423 -0
lfx/components/models_and_agents/language_model.py +398 -0
lfx/components/{agents → models_and_agents}/mcp_component.py +53 -44
lfx/components/{helpers → models_and_agents}/memory.py +1 -1
lfx/components/nvidia/system_assist.py +1 -1
lfx/components/olivya/olivya.py +1 -1
lfx/components/ollama/ollama.py +17 -3
lfx/components/processing/__init__.py +9 -57
lfx/components/processing/converter.py +1 -1
lfx/components/processing/dataframe_operations.py +1 -1
lfx/components/processing/parse_json_data.py +2 -2
lfx/components/processing/parser.py +1 -1
lfx/components/processing/split_text.py +1 -1
lfx/components/qdrant/qdrant.py +1 -1
lfx/components/redis/redis.py +1 -1
lfx/components/twelvelabs/split_video.py +10 -0
lfx/components/twelvelabs/video_file.py +12 -0
lfx/components/utilities/__init__.py +43 -0
lfx/components/{helpers → utilities}/calculator_core.py +1 -1
lfx/components/{helpers → utilities}/current_date.py +1 -1
lfx/components/{processing → utilities}/python_repl_core.py +1 -1
lfx/components/vectorstores/local_db.py +9 -0
lfx/components/youtube/youtube_transcripts.py +118 -30
lfx/custom/custom_component/component.py +57 -1
lfx/custom/custom_component/custom_component.py +68 -6
lfx/graph/edge/base.py +43 -20
lfx/graph/graph/base.py +4 -1
lfx/graph/state/model.py +15 -2
lfx/graph/utils.py +6 -0
lfx/graph/vertex/base.py +4 -1
lfx/graph/vertex/param_handler.py +10 -7
lfx/helpers/__init__.py +12 -0
lfx/helpers/flow.py +117 -0
lfx/inputs/input_mixin.py +24 -1
lfx/inputs/inputs.py +13 -1
lfx/interface/components.py +161 -83
lfx/log/logger.py +5 -3
lfx/services/database/__init__.py +5 -0
lfx/services/database/service.py +25 -0
lfx/services/deps.py +87 -22
lfx/services/manager.py +19 -6
lfx/services/mcp_composer/service.py +998 -157
lfx/services/session.py +5 -0
lfx/services/settings/base.py +51 -7
lfx/services/settings/constants.py +8 -0
lfx/services/storage/local.py +76 -46
lfx/services/storage/service.py +152 -29
lfx/template/field/base.py +3 -0
lfx/utils/ssrf_protection.py +384 -0
lfx/utils/validate_cloud.py +26 -0
{lfx_nightly-0.2.0.dev0.dist-info → lfx_nightly-0.2.0.dev26.dist-info}/METADATA +38 -22
{lfx_nightly-0.2.0.dev0.dist-info → lfx_nightly-0.2.0.dev26.dist-info}/RECORD +182 -150
{lfx_nightly-0.2.0.dev0.dist-info → lfx_nightly-0.2.0.dev26.dist-info}/WHEEL +1 -1
lfx/components/agents/altk_agent.py +0 -366
lfx/components/agents/cuga_agent.py +0 -1013
lfx/components/docling/docling_remote_vlm.py +0 -284
lfx/components/logic/run_flow.py +0 -71
lfx/components/models/embedding_model.py +0 -195
lfx/components/models/language_model.py +0 -144
/lfx/components/{data → data_source}/mock_data.py +0 -0
/lfx/components/{knowledge_bases → files_and_knowledge}/ingestion.py +0 -0
/lfx/components/{logic → flow_controls}/data_conditional_router.py +0 -0
/lfx/components/{logic → flow_controls}/flow_tool.py +0 -0
/lfx/components/{logic → flow_controls}/listen.py +0 -0
/lfx/components/{logic → flow_controls}/notify.py +0 -0
/lfx/components/{logic → flow_controls}/pass_message.py +0 -0
/lfx/components/{logic → flow_controls}/sub_flow.py +0 -0
/lfx/components/{processing → models_and_agents}/prompt.py +0 -0
/lfx/components/{helpers → processing}/create_list.py +0 -0
/lfx/components/{helpers → processing}/output_parser.py +0 -0
/lfx/components/{helpers → processing}/store_message.py +0 -0
/lfx/components/{helpers → utilities}/id_generator.py +0 -0
{lfx_nightly-0.2.0.dev0.dist-info → lfx_nightly-0.2.0.dev26.dist-info}/entry_points.txt +0 -0

lfx/components/files_and_knowledge/__init__.py ADDED Viewed

@@ -0,0 +1,47 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any
+from lfx.components._importing import import_mod
+if TYPE_CHECKING:
+    from lfx.components.files_and_knowledge.directory import DirectoryComponent
+    from lfx.components.files_and_knowledge.file import FileComponent
+    from lfx.components.files_and_knowledge.ingestion import KnowledgeIngestionComponent
+    from lfx.components.files_and_knowledge.retrieval import KnowledgeRetrievalComponent
+    from lfx.components.files_and_knowledge.save_file import SaveToFileComponent
+_dynamic_imports = {
+    "DirectoryComponent": "directory",
+    "FileComponent": "file",
+    "KnowledgeIngestionComponent": "ingestion",
+    "KnowledgeRetrievalComponent": "retrieval",
+    "SaveToFileComponent": "save_file",
+}
+__all__ = [
+    "DirectoryComponent",
+    "FileComponent",
+    "KnowledgeIngestionComponent",
+    "KnowledgeRetrievalComponent",
+    "SaveToFileComponent",
+]
+def __getattr__(attr_name: str) -> Any:
+    """Lazily import files and knowledge components on attribute access."""
+    if attr_name not in _dynamic_imports:
+        msg = f"module '{__name__}' has no attribute '{attr_name}'"
+        raise AttributeError(msg)
+    try:
+        result = import_mod(attr_name, _dynamic_imports[attr_name], __spec__.parent)
+    except (ModuleNotFoundError, ImportError, AttributeError) as e:
+        msg = f"Could not import '{attr_name}' from '{__name__}': {e}"
+        raise AttributeError(msg) from e
+    globals()[attr_name] = result
+    return result
+def __dir__() -> list[str]:
+    return list(__all__)

lfx/components/{data → files_and_knowledge}/directory.py RENAMED Viewed

@@ -9,7 +9,7 @@ from lfx.template.field.base import Output
 class DirectoryComponent(Component):
     display_name = "Directory"
     description = "Recursively load files from a directory."
-    documentation: str = "https://docs.langflow.org/components-data#directory"
+    documentation: str = "https://docs.langflow.org/directory"
     icon = "folder"
     name = "Directory"

lfx/components/{data → files_and_knowledge}/file.py RENAMED Viewed

@@ -10,34 +10,44 @@ Notes:
 from __future__ import annotations
+import contextlib
 import json
 import subprocess
 import sys
 import textwrap
 from copy import deepcopy
+from pathlib import Path
+from tempfile import NamedTemporaryFile
 from typing import Any
 from lfx.base.data.base_file import BaseFileComponent
+from lfx.base.data.storage_utils import parse_storage_path
 from lfx.base.data.utils import TEXT_FILE_TYPES, parallel_load_data, parse_text_file_to_data
 from lfx.inputs.inputs import DropdownInput, MessageTextInput, StrInput
 from lfx.io import BoolInput, FileInput, IntInput, Output
 from lfx.schema.data import Data
 from lfx.schema.dataframe import DataFrame  # noqa: TC001
 from lfx.schema.message import Message
+from lfx.services.deps import get_settings_service, get_storage_service
+from lfx.utils.async_helpers import run_until_complete
 class FileComponent(BaseFileComponent):
     """File component with optional Docling processing (isolated in a subprocess)."""
     display_name = "Read File"
-    description = "Loads content from one or more files."
-    documentation: str = "https://docs.langflow.org/components-data#file"
+    # description is now a dynamic property - see get_tool_description()
+    _base_description = "Loads content from one or more files."
+    documentation: str = "https://docs.langflow.org/read-file"
     icon = "file-text"
     name = "File"
+    add_tool_output = True  # Enable tool mode toggle without requiring tool_mode inputs
-    # Docling-supported/compatible extensions; TEXT_FILE_TYPES are supported by the base loader.
-    VALID_EXTENSIONS = [
-        *TEXT_FILE_TYPES,
+    # Extensions that can be processed without Docling (using standard text parsing)
+    TEXT_EXTENSIONS = TEXT_FILE_TYPES
+    # Extensions that require Docling for processing (images, advanced office formats, etc.)
+    DOCLING_ONLY_EXTENSIONS = [
         "adoc",
         "asciidoc",
         "asc",
@@ -61,6 +71,12 @@ class FileComponent(BaseFileComponent):
         "webp",
     ]
+    # Docling-supported/compatible extensions; TEXT_FILE_TYPES are supported by the base loader.
+    VALID_EXTENSIONS = [
+        *TEXT_EXTENSIONS,
+        *DOCLING_ONLY_EXTENSIONS,
+    ]
     # Fixed export settings used when markdown export is requested.
     EXPORT_FORMAT = "Markdown"
     IMAGE_MODE = "placeholder"
@@ -70,10 +86,24 @@ class FileComponent(BaseFileComponent):
     for input_item in _base_inputs:
         if isinstance(input_item, FileInput) and input_item.name == "path":
             input_item.real_time_refresh = True
+            input_item.tool_mode = False  # Disable tool mode for file upload input
+            input_item.required = False  # Make it optional so it doesn't error in tool mode
             break
     inputs = [
         *_base_inputs,
+        StrInput(
+            name="file_path_str",
+            display_name="File Path",
+            info=(
+                "Path to the file to read. Used when component is called as a tool. "
+                "If not provided, will use the uploaded file from 'path' input."
+            ),
+            show=False,
+            advanced=True,
+            tool_mode=True,  # Required for Toolset toggle, but _get_tools() ignores this parameter
+            required=False,
+        ),
         BoolInput(
             name="advanced_mode",
             display_name="Advanced Parser",
@@ -152,9 +182,87 @@ class FileComponent(BaseFileComponent):
     ]
     outputs = [
-        Output(display_name="Raw Content", name="message", method="load_files_message"),
+        Output(display_name="Raw Content", name="message", method="load_files_message", tool_mode=True),
     ]
+    # ------------------------------ Tool description with file names --------------
+    def get_tool_description(self) -> str:
+        """Return a dynamic description that includes the names of uploaded files.
+        This helps the Agent understand which files are available to read.
+        """
+        base_description = "Loads and returns the content from uploaded files."
+        # Get the list of uploaded file paths
+        file_paths = getattr(self, "path", None)
+        if not file_paths:
+            return base_description
+        # Ensure it's a list
+        if not isinstance(file_paths, list):
+            file_paths = [file_paths]
+        # Extract just the file names from the paths
+        file_names = []
+        for fp in file_paths:
+            if fp:
+                name = Path(fp).name
+                file_names.append(name)
+        if file_names:
+            files_str = ", ".join(file_names)
+            return f"{base_description} Available files: {files_str}. Call this tool to read these files."
+        return base_description
+    @property
+    def description(self) -> str:
+        """Dynamic description property that includes uploaded file names."""
+        return self.get_tool_description()
+    async def _get_tools(self) -> list:
+        """Override to create a tool without parameters.
+        The Read File component should use the files already uploaded via UI,
+        not accept file paths from the Agent (which wouldn't know the internal paths).
+        """
+        from langchain_core.tools import StructuredTool
+        from pydantic import BaseModel
+        # Empty schema - no parameters needed
+        class EmptySchema(BaseModel):
+            """No parameters required - uses pre-uploaded files."""
+        async def read_files_tool() -> str:
+            """Read the content of uploaded files."""
+            try:
+                result = self.load_files_message()
+                if hasattr(result, "get_text"):
+                    return result.get_text()
+                if hasattr(result, "text"):
+                    return result.text
+                return str(result)
+            except (FileNotFoundError, ValueError, OSError, RuntimeError) as e:
+                return f"Error reading files: {e}"
+        description = self.get_tool_description()
+        tool = StructuredTool(
+            name="load_files_message",
+            description=description,
+            coroutine=read_files_tool,
+            args_schema=EmptySchema,
+            handle_tool_error=True,
+            tags=["load_files_message"],
+            metadata={
+                "display_name": "Read File",
+                "display_description": description,
+            },
+        )
+        return [tool]
     # ------------------------------ UI helpers --------------------------------------
     def _path_value(self, template: dict) -> list[str]:
@@ -213,39 +321,84 @@ class FileComponent(BaseFileComponent):
             file_path = paths[0] if field_name == "path" else frontend_node["template"]["path"]["file_path"][0]
             if file_path.endswith((".csv", ".xlsx", ".parquet")):
                 frontend_node["outputs"].append(
-                    Output(display_name="Structured Content", name="dataframe", method="load_files_structured"),
+                    Output(
+                        display_name="Structured Content",
+                        name="dataframe",
+                        method="load_files_structured",
+                        tool_mode=True,
+                    ),
                 )
             elif file_path.endswith(".json"):
                 frontend_node["outputs"].append(
-                    Output(display_name="Structured Content", name="json", method="load_files_json"),
+                    Output(display_name="Structured Content", name="json", method="load_files_json", tool_mode=True),
                 )
             advanced_mode = frontend_node.get("template", {}).get("advanced_mode", {}).get("value", False)
             if advanced_mode:
                 frontend_node["outputs"].append(
-                    Output(display_name="Structured Output", name="advanced_dataframe", method="load_files_dataframe"),
+                    Output(
+                        display_name="Structured Output",
+                        name="advanced_dataframe",
+                        method="load_files_dataframe",
+                        tool_mode=True,
+                    ),
                 )
                 frontend_node["outputs"].append(
-                    Output(display_name="Markdown", name="advanced_markdown", method="load_files_markdown"),
+                    Output(
+                        display_name="Markdown", name="advanced_markdown", method="load_files_markdown", tool_mode=True
+                    ),
                 )
                 frontend_node["outputs"].append(
-                    Output(display_name="File Path", name="path", method="load_files_path"),
+                    Output(display_name="File Path", name="path", method="load_files_path", tool_mode=True),
                 )
             else:
                 frontend_node["outputs"].append(
-                    Output(display_name="Raw Content", name="message", method="load_files_message"),
+                    Output(display_name="Raw Content", name="message", method="load_files_message", tool_mode=True),
                 )
                 frontend_node["outputs"].append(
-                    Output(display_name="File Path", name="path", method="load_files_path"),
+                    Output(display_name="File Path", name="path", method="load_files_path", tool_mode=True),
                 )
         else:
             # Multiple files => DataFrame output; advanced parser disabled
-            frontend_node["outputs"].append(Output(display_name="Files", name="dataframe", method="load_files"))
+            frontend_node["outputs"].append(
+                Output(display_name="Files", name="dataframe", method="load_files", tool_mode=True)
+            )
         return frontend_node
     # ------------------------------ Core processing ----------------------------------
+    def _validate_and_resolve_paths(self) -> list[BaseFileComponent.BaseFile]:
+        """Override to handle file_path_str input from tool mode.
+        When called as a tool, the file_path_str parameter can be set.
+        If not provided, it will fall back to using the path FileInput (uploaded file).
+        Priority:
+        1. file_path_str (if provided by the tool call)
+        2. path (uploaded file from UI)
+        """
+        # Check if file_path_str is provided (from tool mode)
+        file_path_str = getattr(self, "file_path_str", None)
+        if file_path_str:
+            # Use the string path from tool mode
+            from pathlib import Path
+            from lfx.schema.data import Data
+            resolved_path = Path(self.resolve_path(file_path_str))
+            if not resolved_path.exists():
+                msg = f"File or directory not found: {file_path_str}"
+                self.log(msg)
+                if not self.silent_errors:
+                    raise ValueError(msg)
+                return []
+            data_obj = Data(data={self.SERVER_FILE_PATH_FIELDNAME: str(resolved_path)})
+            return [BaseFileComponent.BaseFile(data_obj, resolved_path, delete_after_processing=False)]
+        # Otherwise use the default implementation (uses path FileInput)
+        return super()._validate_and_resolve_paths()
     def _is_docling_compatible(self, file_path: str) -> bool:
         """Lightweight extension gate for Docling-compatible types."""
         docling_exts = (
@@ -282,17 +435,76 @@ class FileComponent(BaseFileComponent):
         )
         return file_path.lower().endswith(docling_exts)
+    async def _get_local_file_for_docling(self, file_path: str) -> tuple[str, bool]:
+        """Get a local file path for Docling processing, downloading from S3 if needed.
+        Args:
+            file_path: Either a local path or S3 key (format "flow_id/filename")
+        Returns:
+            tuple[str, bool]: (local_path, should_delete) where should_delete indicates
+                              if this is a temporary file that should be cleaned up
+        """
+        settings = get_settings_service().settings
+        if settings.storage_type == "local":
+            return file_path, False
+        # S3 storage - download to temp file
+        parsed = parse_storage_path(file_path)
+        if not parsed:
+            msg = f"Invalid S3 path format: {file_path}. Expected 'flow_id/filename'"
+            raise ValueError(msg)
+        storage_service = get_storage_service()
+        flow_id, filename = parsed
+        # Get file content from S3
+        content = await storage_service.get_file(flow_id, filename)
+        suffix = Path(filename).suffix
+        with NamedTemporaryFile(mode="wb", suffix=suffix, delete=False) as tmp_file:
+            tmp_file.write(content)
+            temp_path = tmp_file.name
+        return temp_path, True
     def _process_docling_in_subprocess(self, file_path: str) -> Data | None:
         """Run Docling in a separate OS process and map the result to a Data object.
         We avoid multiprocessing pickling by launching `python -c "<script>"` and
         passing JSON config via stdin. The child prints a JSON result to stdout.
+        For S3 storage, the file is downloaded to a temp file first.
         """
         if not file_path:
             return None
+        settings = get_settings_service().settings
+        if settings.storage_type == "s3":
+            local_path, should_delete = run_until_complete(self._get_local_file_for_docling(file_path))
+        else:
+            local_path = file_path
+            should_delete = False
+        try:
+            return self._process_docling_subprocess_impl(local_path, file_path)
+        finally:
+            # Clean up temp file if we created one
+            if should_delete:
+                with contextlib.suppress(Exception):
+                    Path(local_path).unlink()  # Ignore cleanup errors
+    def _process_docling_subprocess_impl(self, local_file_path: str, original_file_path: str) -> Data | None:
+        """Implementation of Docling subprocess processing.
+        Args:
+            local_file_path: Path to local file to process
+            original_file_path: Original file path to include in metadata
+        Returns:
+            Data object with processed content
+        """
         args: dict[str, Any] = {
-            "file_path": file_path,
+            "file_path": local_file_path,
             "markdown": bool(self.markdown),
             "image_mode": str(self.IMAGE_MODE),
             "md_image_placeholder": str(self.md_image_placeholder),
@@ -303,7 +515,7 @@ class FileComponent(BaseFileComponent):
             ),
         }
-        self.log(f"Starting Docling subprocess for file: {file_path}")
+        self.log(f"Starting Docling subprocess for file: {local_file_path}")
         self.log(args)
         # Child script for isolating the docling processing
@@ -496,14 +708,17 @@ class FileComponent(BaseFileComponent):
         if not proc.stdout:
             err_msg = proc.stderr.decode("utf-8", errors="replace") or "no output from child process"
-            return Data(data={"error": f"Docling subprocess error: {err_msg}", "file_path": file_path})
+            return Data(data={"error": f"Docling subprocess error: {err_msg}", "file_path": original_file_path})
         try:
             result = json.loads(proc.stdout.decode("utf-8"))
         except Exception as e:  # noqa: BLE001
             err_msg = proc.stderr.decode("utf-8", errors="replace")
             return Data(
-                data={"error": f"Invalid JSON from Docling subprocess: {e}. stderr={err_msg}", "file_path": file_path},
+                data={
+                    "error": f"Invalid JSON from Docling subprocess: {e}. stderr={err_msg}",
+                    "file_path": original_file_path,
+                },
             )
         if not result.get("ok"):
@@ -533,6 +748,18 @@ class FileComponent(BaseFileComponent):
             msg = "No files to process."
             raise ValueError(msg)
+        # Validate that files requiring Docling are only processed when advanced mode is enabled
+        if not self.advanced_mode:
+            for file in file_list:
+                extension = file.path.suffix[1:].lower()
+                if extension in self.DOCLING_ONLY_EXTENSIONS:
+                    msg = (
+                        f"File '{file.path.name}' has extension '.{extension}' which requires "
+                        f"Advanced Parser mode. Please enable 'Advanced Parser' to process this file."
+                    )
+                    self.log(msg)
+                    raise ValueError(msg)
         def process_file_standard(file_path: str, *, silent_errors: bool = False) -> Data | None:
             try:
                 return parse_text_file_to_data(file_path, silent_errors=silent_errors)
@@ -577,6 +804,7 @@ class FileComponent(BaseFileComponent):
         # Standard multi-file (or single non-advanced) path
         concurrency = 1 if not self.use_multithreading else max(1, self.concurrency_multithreading)
         file_paths = [str(f.path) for f in file_list]
         self.log(f"Starting parallel processing of {len(file_paths)} files with concurrency: {concurrency}.")
         my_data = parallel_load_data(

lfx/components/{knowledge_bases → files_and_knowledge}/retrieval.py RENAMED Viewed

@@ -235,8 +235,8 @@ class KnowledgeRetrievalComponent(Component):
             # Only proceed if we have valid document IDs
             if doc_ids:
-                # Access underlying client to get embeddings
-                collection = chroma._client.get_collection(name=self.knowledge_base)
+                # Access underlying collection to get embeddings
+                collection = chroma._collection  # noqa: SLF001
                 embeddings_result = collection.get(where={"_id": {"$in": doc_ids}}, include=["metadatas", "embeddings"])
                 # Create a mapping from document ID to embedding

lfx-nightly 0.2.0.dev0__py3-none-any.whl → 0.2.0.dev26__py3-none-any.whl

lfx-nightly 0.2.0.dev0py3-none-any.whl → 0.2.0.dev26py3-none-any.whl