PyPI - lfx-nightly - Versions diffs - 0.2.0.dev26__py3-none-any.whl → 0.2.0.dev41__py3-none-any.whl - Mend

lfx-nightly 0.2.0.dev26py3-none-any.whl → 0.2.0.dev41py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

lfx/_assets/component_index.json +1 -1
lfx/base/agents/agent.py +8 -3
lfx/base/agents/altk_base_agent.py +16 -3
lfx/base/data/base_file.py +14 -4
lfx/base/data/docling_utils.py +61 -10
lfx/base/data/storage_utils.py +109 -0
lfx/base/mcp/util.py +2 -2
lfx/base/models/anthropic_constants.py +21 -12
lfx/cli/commands.py +3 -1
lfx/components/docling/chunk_docling_document.py +3 -1
lfx/components/docling/export_docling_document.py +3 -1
lfx/components/files_and_knowledge/file.py +59 -7
lfx/components/files_and_knowledge/save_file.py +79 -12
lfx/components/ibm/watsonx.py +7 -1
lfx/components/input_output/chat_output.py +7 -1
lfx/components/llm_operations/batch_run.py +16 -7
lfx/components/models_and_agents/agent.py +4 -2
lfx/components/models_and_agents/embedding_model.py +6 -76
lfx/components/ollama/ollama.py +9 -4
lfx/components/processing/__init__.py +0 -3
lfx/custom/directory_reader/directory_reader.py +5 -2
lfx/graph/graph/base.py +1 -4
lfx/graph/vertex/base.py +1 -4
lfx/schema/image.py +2 -12
lfx/services/interfaces.py +5 -0
lfx/services/manager.py +5 -4
lfx/services/mcp_composer/service.py +38 -12
lfx/services/settings/auth.py +18 -11
lfx/services/settings/base.py +5 -23
lfx/services/storage/local.py +32 -0
lfx/services/storage/service.py +19 -0
lfx/utils/image.py +29 -11
{lfx_nightly-0.2.0.dev26.dist-info → lfx_nightly-0.2.0.dev41.dist-info}/METADATA +1 -1
{lfx_nightly-0.2.0.dev26.dist-info → lfx_nightly-0.2.0.dev41.dist-info}/RECORD +36 -39
lfx/base/embeddings/embeddings_class.py +0 -113
lfx/components/elastic/opensearch_multimodal.py +0 -1575
lfx/components/processing/dataframe_to_toolset.py +0 -259
{lfx_nightly-0.2.0.dev26.dist-info → lfx_nightly-0.2.0.dev41.dist-info}/WHEEL +0 -0
{lfx_nightly-0.2.0.dev26.dist-info → lfx_nightly-0.2.0.dev41.dist-info}/entry_points.txt +0 -0

lfx/base/agents/agent.py CHANGED Viewed

@@ -181,7 +181,11 @@ class LCAgentComponent(Component):
         else:
             input_dict = {"input": self.input_value}
-        if hasattr(self, "system_prompt"):
+        # Ensure input_dict is initialized
+        if "input" not in input_dict:
+            input_dict = {"input": self.input_value}
+        if hasattr(self, "system_prompt") and self.system_prompt and self.system_prompt.strip():
             input_dict["system_prompt"] = self.system_prompt
         if hasattr(self, "chat_history") and self.chat_history:
@@ -196,8 +200,9 @@ class LCAgentComponent(Component):
         # Note: Agent input must be a string, so we extract text and move images to chat_history
         if lc_message is not None and hasattr(lc_message, "content") and isinstance(lc_message.content, list):
             # Extract images and text from the text content items
-            image_dicts = [item for item in lc_message.content if item.get("type") == "image"]
-            text_content = [item for item in lc_message.content if item.get("type") != "image"]
+            # Support both "image" (legacy) and "image_url" (standard) types
+            image_dicts = [item for item in lc_message.content if item.get("type") in ("image", "image_url")]
+            text_content = [item for item in lc_message.content if item.get("type") not in ("image", "image_url")]
             text_strings = [
                 item.get("text", "")

lfx/base/agents/altk_base_agent.py CHANGED Viewed

@@ -319,9 +319,9 @@ class ALTKBaseAgentComponent(AgentComponent):
                 input_dict["chat_history"] = data_to_messages([m.to_data() for m in self.chat_history])
         if hasattr(lc_message, "content") and isinstance(lc_message.content, list):
             # ! Because the input has to be a string, we must pass the images in the chat_history
-            image_dicts = [item for item in lc_message.content if item.get("type") == "image"]
-            lc_message.content = [item for item in lc_message.content if item.get("type") != "image"]
+            # Support both "image" (legacy) and "image_url" (standard) types
+            image_dicts = [item for item in lc_message.content if item.get("type") in ("image", "image_url")]
+            lc_message.content = [item for item in lc_message.content if item.get("type") not in ("image", "image_url")]
             if "chat_history" not in input_dict:
                 input_dict["chat_history"] = []
@@ -330,6 +330,19 @@ class ALTKBaseAgentComponent(AgentComponent):
             else:
                 input_dict["chat_history"] = [HumanMessage(content=[image_dict]) for image_dict in image_dicts]
         input_dict["input"] = input_text
+        # Copied from agent.py
+        # Final safety check: ensure input is never empty (prevents Anthropic API errors)
+        current_input = input_dict.get("input", "")
+        if isinstance(current_input, list):
+            current_input = " ".join(map(str, current_input))
+        elif not isinstance(current_input, str):
+            current_input = str(current_input)
+        if not current_input.strip():
+            input_dict["input"] = "Continue the conversation."
+        else:
+            input_dict["input"] = current_input
         if hasattr(self, "graph"):
             session_id = self.graph.session_id
         elif hasattr(self, "_session_id"):

lfx/base/data/base_file.py CHANGED Viewed

@@ -260,8 +260,6 @@ class BaseFileComponent(Component, ABC):
         filename = file_path_obj.name
         settings = get_settings_service().settings
-        # Get file size - use storage service for S3, filesystem for local
         if settings.storage_type == "s3":
             try:
                 file_size = get_file_size(file_path)
@@ -618,9 +616,21 @@ class BaseFileComponent(Component, ABC):
                     BaseFileComponent.BaseFile(data, Path(path_str), delete_after_processing=delete_after_processing)
                 )
             else:
-                resolved_path = Path(self.resolve_path(path_str))
+                # Check if path looks like a storage path (flow_id/filename format)
+                # If so, use get_full_path to resolve it to the actual storage location
+                if "/" in path_str and not Path(path_str).is_absolute():
+                    try:
+                        resolved_path = Path(self.get_full_path(path_str))
+                        self.log(f"Resolved storage path '{path_str}' to '{resolved_path}'")
+                    except (ValueError, AttributeError) as e:
+                        # Fallback to resolve_path if get_full_path fails
+                        self.log(f"get_full_path failed for '{path_str}': {e}, falling back to resolve_path")
+                        resolved_path = Path(self.resolve_path(path_str))
+                else:
+                    resolved_path = Path(self.resolve_path(path_str))
                 if not resolved_path.exists():
-                    msg = f"File or directory not found: {path}"
+                    msg = f"File not found: '{path}' (resolved to: '{resolved_path}'). Please upload the file again."
                     self.log(msg)
                     if not self.silent_errors:
                         raise ValueError(msg)

lfx/base/data/docling_utils.py CHANGED Viewed

@@ -25,21 +25,72 @@ class DoclingDependencyError(Exception):
         super().__init__(f"{dependency_name} is not correctly installed. {install_command}")
-def extract_docling_documents(data_inputs: Data | list[Data] | DataFrame, doc_key: str) -> list[DoclingDocument]:
+def extract_docling_documents(
+    data_inputs: Data | list[Data] | DataFrame, doc_key: str
+) -> tuple[list[DoclingDocument], str | None]:
+    """Extract DoclingDocument objects from data inputs.
+    Args:
+        data_inputs: The data inputs containing DoclingDocument objects
+        doc_key: The key/column name to look for DoclingDocument objects
+    Returns:
+        A tuple of (documents, warning_message) where warning_message is None if no warning
+    Raises:
+        TypeError: If the data cannot be extracted or is invalid
+    """
     documents: list[DoclingDocument] = []
+    warning_message: str | None = None
     if isinstance(data_inputs, DataFrame):
         if not len(data_inputs):
             msg = "DataFrame is empty"
             raise TypeError(msg)
-        if doc_key not in data_inputs.columns:
-            msg = f"Column '{doc_key}' not found in DataFrame"
-            raise TypeError(msg)
-        try:
-            documents = data_inputs[doc_key].tolist()
-        except Exception as e:
-            msg = f"Error extracting DoclingDocument from DataFrame: {e}"
-            raise TypeError(msg) from e
+        # Primary: Check for exact column name match
+        if doc_key in data_inputs.columns:
+            try:
+                documents = data_inputs[doc_key].tolist()
+            except Exception as e:
+                msg = f"Error extracting DoclingDocument from DataFrame column '{doc_key}': {e}"
+                raise TypeError(msg) from e
+        else:
+            # Fallback: Search all columns for DoclingDocument objects
+            found_column = None
+            for col in data_inputs.columns:
+                try:
+                    # Check if this column contains DoclingDocument objects
+                    sample = data_inputs[col].dropna().iloc[0] if len(data_inputs[col].dropna()) > 0 else None
+                    if sample is not None and isinstance(sample, DoclingDocument):
+                        found_column = col
+                        break
+                except (IndexError, AttributeError):
+                    continue
+            if found_column:
+                warning_message = (
+                    f"Column '{doc_key}' not found, but found DoclingDocument objects in column '{found_column}'. "
+                    f"Using '{found_column}' instead. Consider updating the 'Doc Key' parameter."
+                )
+                logger.warning(warning_message)
+                try:
+                    documents = data_inputs[found_column].tolist()
+                except Exception as e:
+                    msg = f"Error extracting DoclingDocument from DataFrame column '{found_column}': {e}"
+                    raise TypeError(msg) from e
+            else:
+                # Provide helpful error message
+                available_columns = list(data_inputs.columns)
+                msg = (
+                    f"Column '{doc_key}' not found in DataFrame. "
+                    f"Available columns: {available_columns}. "
+                    f"\n\nPossible solutions:\n"
+                    f"1. Use the 'Data' output from Docling component instead of 'DataFrame' output\n"
+                    f"2. Update the 'Doc Key' parameter to match one of the available columns\n"
+                    f"3. If using VLM pipeline, try using the standard pipeline"
+                )
+                raise TypeError(msg)
     else:
         if not data_inputs:
             msg = "No data inputs provided"
@@ -69,7 +120,7 @@ def extract_docling_documents(data_inputs: Data | list[Data] | DataFrame, doc_ke
             except AttributeError as e:
                 msg = f"Invalid input type in collection: {e}"
                 raise TypeError(msg) from e
-    return documents
+    return documents, warning_message
 def _unwrap_secrets(obj):

lfx/base/data/storage_utils.py CHANGED Viewed

@@ -190,3 +190,112 @@ def file_exists(file_path: str, storage_service: StorageService | None = None) -
         return False
     else:
         return True
+# Magic bytes signatures for common image formats
+MIN_IMAGE_HEADER_SIZE = 12  # Minimum bytes needed to detect image type
+IMAGE_SIGNATURES: dict[str, list[tuple[bytes, int]]] = {
+    "jpeg": [(b"\xff\xd8\xff", 0)],
+    "jpg": [(b"\xff\xd8\xff", 0)],
+    "png": [(b"\x89PNG\r\n\x1a\n", 0)],
+    "gif": [(b"GIF87a", 0), (b"GIF89a", 0)],
+    "webp": [(b"RIFF", 0)],  # WebP starts with RIFF, then has WEBP at offset 8
+    "bmp": [(b"BM", 0)],
+    "tiff": [(b"II*\x00", 0), (b"MM\x00*", 0)],  # Little-endian and big-endian TIFF
+}
+def detect_image_type_from_bytes(content: bytes) -> str | None:
+    """Detect the actual image type from file content using magic bytes.
+    Args:
+        content: The file content bytes (at least first 12 bytes needed)
+    Returns:
+        str | None: The detected image type (e.g., "jpeg", "png") or None if not recognized
+    """
+    if len(content) < MIN_IMAGE_HEADER_SIZE:
+        return None
+    # Check WebP specifically (needs to check both RIFF and WEBP)
+    if content[:4] == b"RIFF" and content[8:12] == b"WEBP":
+        return "webp"
+    # Check other image signatures
+    for image_type, signatures in IMAGE_SIGNATURES.items():
+        if image_type == "webp":
+            continue  # Already handled above
+        for signature, offset in signatures:
+            if content[offset : offset + len(signature)] == signature:
+                return image_type
+    return None
+def validate_image_content_type(
+    file_path: str,
+    content: bytes | None = None,
+    storage_service: StorageService | None = None,
+    resolve_path: Callable[[str], str] | None = None,
+) -> tuple[bool, str | None]:
+    """Validate that an image file's content matches its declared extension.
+    This prevents errors like "Image does not match the provided media type image/png"
+    when a JPEG file is saved with a .png extension.
+    Only rejects files when we can definitively detect a mismatch. Files with
+    unrecognized content are allowed through (they may fail later, but that's
+    better than false positives blocking valid files).
+    Args:
+        file_path: Path to the image file
+        content: Optional pre-read file content bytes. If not provided, will read from file.
+        storage_service: Optional storage service instance for S3 files
+        resolve_path: Optional function to resolve relative paths
+    Returns:
+        tuple[bool, str | None]: (is_valid, error_message)
+            - (True, None) if the content matches the extension, is unrecognized, or file is not an image
+            - (False, error_message) if there's a definite mismatch
+    """
+    # Get the file extension
+    path_obj = Path(file_path)
+    extension = path_obj.suffix[1:].lower() if path_obj.suffix else ""
+    # Only validate image files
+    image_extensions = {"jpeg", "jpg", "png", "gif", "webp", "bmp", "tiff"}
+    if extension not in image_extensions:
+        return True, None
+    # Read content if not provided
+    if content is None:
+        try:
+            content = run_until_complete(read_file_bytes(file_path, storage_service, resolve_path))
+        except (FileNotFoundError, ValueError):
+            # Can't read file - let it pass, will fail later with better error
+            return True, None
+    # Detect actual image type
+    detected_type = detect_image_type_from_bytes(content)
+    # If we can't detect the type, the file is not a valid image
+    if detected_type is None:
+        return False, (
+            f"File '{path_obj.name}' has extension '.{extension}' but its content "
+            f"is not a valid image format. The file may be corrupted, empty, or not a real image."
+        )
+    # Normalize extensions for comparison (jpg == jpeg, tif == tiff)
+    extension_normalized = "jpeg" if extension == "jpg" else extension
+    detected_normalized = "jpeg" if detected_type == "jpg" else detected_type
+    if extension_normalized != detected_normalized:
+        return False, (
+            f"File '{path_obj.name}' has extension '.{extension}' but contains "
+            f"'{detected_type.upper()}' image data. This mismatch will cause API errors. "
+            f"Please rename the file with the correct extension '.{detected_type}' or "
+            f"re-save it in the correct format."
+        )
+    return True, None

lfx/base/mcp/util.py CHANGED Viewed

@@ -23,6 +23,7 @@ from pydantic import BaseModel
 from lfx.log.logger import logger
 from lfx.schema.json_schema import create_input_schema_from_json_schema
 from lfx.services.deps import get_settings_service
+from lfx.utils.async_helpers import run_until_complete
 HTTP_ERROR_STATUS_CODE = httpx_codes.BAD_REQUEST  # HTTP status code for client errors
@@ -351,8 +352,7 @@ def create_tool_func(tool_name: str, arg_schema: type[BaseModel], client) -> Cal
             _handle_tool_validation_error(e, tool_name, provided_args, arg_schema)
         try:
-            loop = asyncio.get_event_loop()
-            return loop.run_until_complete(client.run_tool(tool_name, arguments=validated.model_dump()))
+            return run_until_complete(client.run_tool(tool_name, arguments=validated.model_dump()))
         except Exception as e:
             logger.error(f"Tool '{tool_name}' execution failed: {e}")
             # Re-raise with more context

lfx/base/models/anthropic_constants.py CHANGED Viewed

@@ -2,32 +2,41 @@ from .model_metadata import create_model_metadata
 ANTHROPIC_MODELS_DETAILED = [
     # Tool calling supported models
+    create_model_metadata(provider="Anthropic", name="claude-opus-4-5-20251101", icon="Anthropic", tool_calling=True),
+    create_model_metadata(provider="Anthropic", name="claude-haiku-4-5-20251001", icon="Anthropic", tool_calling=True),
     create_model_metadata(provider="Anthropic", name="claude-sonnet-4-5-20250929", icon="Anthropic", tool_calling=True),
     create_model_metadata(provider="Anthropic", name="claude-opus-4-1-20250805", icon="Anthropic", tool_calling=True),
     create_model_metadata(provider="Anthropic", name="claude-opus-4-20250514", icon="Anthropic", tool_calling=True),
     create_model_metadata(provider="Anthropic", name="claude-sonnet-4-20250514", icon="Anthropic", tool_calling=True),
-    create_model_metadata(provider="Anthropic", name="claude-3-7-sonnet-latest", icon="Anthropic", tool_calling=True),
-    create_model_metadata(provider="Anthropic", name="claude-3-5-sonnet-latest", icon="Anthropic", tool_calling=True),
-    create_model_metadata(provider="Anthropic", name="claude-3-5-haiku-latest", icon="Anthropic", tool_calling=True),
-    create_model_metadata(provider="Anthropic", name="claude-3-opus-latest", icon="Anthropic", tool_calling=True),
+    create_model_metadata(provider="Anthropic", name="claude-3-5-haiku-20241022", icon="Anthropic", tool_calling=True),
+    create_model_metadata(provider="Anthropic", name="claude-3-haiku-20240307", icon="Anthropic", tool_calling=True),
+    # Deprecated models
+    create_model_metadata(
+        provider="Anthropic", name="claude-3-7-sonnet-latest", icon="Anthropic", tool_calling=True, deprecated=True
+    ),
+    create_model_metadata(
+        provider="Anthropic", name="claude-3-5-sonnet-latest", icon="Anthropic", tool_calling=True, deprecated=True
+    ),
+    create_model_metadata(
+        provider="Anthropic", name="claude-3-5-haiku-latest", icon="Anthropic", tool_calling=True, deprecated=True
+    ),
+    create_model_metadata(
+        provider="Anthropic", name="claude-3-opus-latest", icon="Anthropic", tool_calling=True, deprecated=True
+    ),
     create_model_metadata(
         provider="Anthropic", name="claude-3-sonnet-20240229", icon="Anthropic", tool_calling=True, deprecated=True
     ),
-    # Tool calling unsupported models
-    create_model_metadata(provider="Anthropic", name="claude-2.1", icon="Anthropic", tool_calling=False),
-    create_model_metadata(provider="Anthropic", name="claude-2.0", icon="Anthropic", tool_calling=False),
-    # Deprecated models
     create_model_metadata(
-        provider="Anthropic", name="claude-3-5-sonnet-20240620", icon="Anthropic", tool_calling=True, deprecated=True
+        provider="Anthropic", name="claude-2.1", icon="Anthropic", tool_calling=False, deprecated=True
     ),
     create_model_metadata(
-        provider="Anthropic", name="claude-3-5-sonnet-20241022", icon="Anthropic", tool_calling=True, deprecated=True
+        provider="Anthropic", name="claude-2.0", icon="Anthropic", tool_calling=False, deprecated=True
     ),
     create_model_metadata(
-        provider="Anthropic", name="claude-3-5-haiku-20241022", icon="Anthropic", tool_calling=True, deprecated=True
+        provider="Anthropic", name="claude-3-5-sonnet-20240620", icon="Anthropic", tool_calling=True, deprecated=True
     ),
     create_model_metadata(
-        provider="Anthropic", name="claude-3-haiku-20240307", icon="Anthropic", tool_calling=True, deprecated=True
+        provider="Anthropic", name="claude-3-5-sonnet-20241022", icon="Anthropic", tool_calling=True, deprecated=True
     ),
 ]

lfx/cli/commands.py CHANGED Viewed

@@ -304,12 +304,14 @@ async def serve_command(
         # Start the server
         try:
-            uvicorn.run(
+            config = uvicorn.Config(
                 serve_app,
                 host=host,
                 port=port,
                 log_level=log_level,
             )
+            server = uvicorn.Server(config)
+            await server.serve()
         except KeyboardInterrupt:
             verbose_print("\n👋 Server stopped")
             raise typer.Exit(0) from None

lfx/components/docling/chunk_docling_document.py CHANGED Viewed

@@ -115,7 +115,9 @@ class ChunkDoclingDocumentComponent(Component):
         return [Data(text=doc.page_content, data=doc.metadata) for doc in docs]
     def chunk_documents(self) -> DataFrame:
-        documents = extract_docling_documents(self.data_inputs, self.doc_key)
+        documents, warning = extract_docling_documents(self.data_inputs, self.doc_key)
+        if warning:
+            self.status = warning
         chunker: BaseChunker
         if self.chunker == "HybridChunker":

lfx/components/docling/export_docling_document.py CHANGED Viewed

@@ -86,7 +86,9 @@ class ExportDoclingDocumentComponent(Component):
         return build_config
     def export_document(self) -> list[Data]:
-        documents = extract_docling_documents(self.data_inputs, self.doc_key)
+        documents, warning = extract_docling_documents(self.data_inputs, self.doc_key)
+        if warning:
+            self.status = warning
         results: list[Data] = []
         try:

lfx/components/files_and_knowledge/file.py CHANGED Viewed

@@ -21,7 +21,7 @@ from tempfile import NamedTemporaryFile
 from typing import Any
 from lfx.base.data.base_file import BaseFileComponent
-from lfx.base.data.storage_utils import parse_storage_path
+from lfx.base.data.storage_utils import parse_storage_path, validate_image_content_type
 from lfx.base.data.utils import TEXT_FILE_TYPES, parallel_load_data, parse_text_file_to_data
 from lfx.inputs.inputs import DropdownInput, MessageTextInput, StrInput
 from lfx.io import BoolInput, FileInput, IntInput, Output
@@ -748,6 +748,27 @@ class FileComponent(BaseFileComponent):
             msg = "No files to process."
             raise ValueError(msg)
+        # Validate image files to detect content/extension mismatches
+        # This prevents API errors like "Image does not match the provided media type"
+        image_extensions = {"jpeg", "jpg", "png", "gif", "webp", "bmp", "tiff"}
+        for file in file_list:
+            extension = file.path.suffix[1:].lower()
+            if extension in image_extensions:
+                # file.path is already resolved, read bytes directly
+                try:
+                    content = file.path.read_bytes()
+                    is_valid, error_msg = validate_image_content_type(
+                        str(file.path),
+                        content=content,
+                    )
+                    if not is_valid:
+                        self.log(error_msg)
+                        if not self.silent_errors:
+                            raise ValueError(error_msg)
+                except OSError as e:
+                    self.log(f"Could not read file for validation: {e}")
+                    # Continue - let it fail later with better error
         # Validate that files requiring Docling are only processed when advanced mode is enabled
         if not self.advanced_mode:
             for file in file_list:
@@ -786,7 +807,8 @@ class FileComponent(BaseFileComponent):
                 # --- UNNEST: expand each element in `doc` to its own Data row
                 payload = getattr(advanced_data, "data", {}) or {}
                 doc_rows = payload.get("doc")
-                if isinstance(doc_rows, list):
+                if isinstance(doc_rows, list) and doc_rows:
+                    # Non-empty list of structured rows
                     rows: list[Data | None] = [
                         Data(
                             data={
@@ -797,6 +819,19 @@ class FileComponent(BaseFileComponent):
                         for item in doc_rows
                     ]
                     final_return.extend(self.rollup_data(file_list, rows))
+                elif isinstance(doc_rows, list) and not doc_rows:
+                    # Empty list - file was processed but no text content found
+                    # Create a Data object indicating no content was extracted
+                    self.log(f"No text extracted from '{file_path}', creating placeholder data")
+                    empty_data = Data(
+                        data={
+                            "file_path": file_path,
+                            "text": "(No text content extracted from image)",
+                            "info": "Image processed successfully but contained no extractable text",
+                            **{k: v for k, v in payload.items() if k != "doc"},
+                        },
+                    )
+                    final_return.extend(self.rollup_data([file], [empty_data]))
                 else:
                     # If not structured, keep as-is (e.g., markdown export or error dict)
                     final_return.extend(self.rollup_data(file_list, [advanced_data]))
@@ -820,13 +855,17 @@ class FileComponent(BaseFileComponent):
     def load_files_helper(self) -> DataFrame:
         result = self.load_files()
-        # Error condition - raise error if no text and an error is present
-        if not hasattr(result, "text"):
-            if hasattr(result, "error"):
-                raise ValueError(result.error[0])
+        # Result is a DataFrame - check if it has any rows
+        if result.empty:
             msg = "Could not extract content from the provided file(s)."
             raise ValueError(msg)
+        # Check for error column with error messages
+        if "error" in result.columns:
+            errors = result["error"].dropna().tolist()
+            if errors and not any(col in result.columns for col in ["text", "doc", "exported_content"]):
+                raise ValueError(errors[0])
         return result
     def load_files_dataframe(self) -> DataFrame:
@@ -838,4 +877,17 @@ class FileComponent(BaseFileComponent):
         """Load files using advanced Docling processing and export to Markdown format."""
         self.markdown = True
         result = self.load_files_helper()
-        return Message(text=str(result.text[0]))
+        # Result is a DataFrame - check for text or exported_content columns
+        if "text" in result.columns and not result["text"].isna().all():
+            text_values = result["text"].dropna().tolist()
+            if text_values:
+                return Message(text=str(text_values[0]))
+        if "exported_content" in result.columns and not result["exported_content"].isna().all():
+            content_values = result["exported_content"].dropna().tolist()
+            if content_values:
+                return Message(text=str(content_values[0]))
+        # Return empty message with info that no text was found
+        return Message(text="(No text content extracted from file)")

lfx-nightly 0.2.0.dev26__py3-none-any.whl → 0.2.0.dev41__py3-none-any.whl

lfx-nightly 0.2.0.dev26py3-none-any.whl → 0.2.0.dev41py3-none-any.whl