PyPI - lfx-nightly - Versions diffs - 0.2.0.dev0__py3-none-any.whl → 0.2.0.dev41__py3-none-any.whl - Mend

lfx-nightly 0.2.0.dev0py3-none-any.whl → 0.2.0.dev41py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (196) hide show

lfx/_assets/component_index.json +1 -1
lfx/base/agents/agent.py +21 -4
lfx/base/agents/altk_base_agent.py +393 -0
lfx/base/agents/altk_tool_wrappers.py +565 -0
lfx/base/agents/events.py +2 -1
lfx/base/composio/composio_base.py +159 -224
lfx/base/data/base_file.py +97 -20
lfx/base/data/docling_utils.py +61 -10
lfx/base/data/storage_utils.py +301 -0
lfx/base/data/utils.py +178 -14
lfx/base/mcp/util.py +2 -2
lfx/base/models/anthropic_constants.py +21 -12
lfx/base/models/groq_constants.py +74 -58
lfx/base/models/groq_model_discovery.py +265 -0
lfx/base/models/model.py +1 -1
lfx/base/models/model_utils.py +100 -0
lfx/base/models/openai_constants.py +7 -0
lfx/base/models/watsonx_constants.py +32 -8
lfx/base/tools/run_flow.py +601 -129
lfx/cli/commands.py +9 -4
lfx/cli/common.py +2 -2
lfx/cli/run.py +1 -1
lfx/cli/script_loader.py +53 -11
lfx/components/Notion/create_page.py +1 -1
lfx/components/Notion/list_database_properties.py +1 -1
lfx/components/Notion/list_pages.py +1 -1
lfx/components/Notion/list_users.py +1 -1
lfx/components/Notion/page_content_viewer.py +1 -1
lfx/components/Notion/search.py +1 -1
lfx/components/Notion/update_page_property.py +1 -1
lfx/components/__init__.py +19 -5
lfx/components/{agents → altk}/__init__.py +5 -9
lfx/components/altk/altk_agent.py +193 -0
lfx/components/apify/apify_actor.py +1 -1
lfx/components/composio/__init__.py +70 -18
lfx/components/composio/apollo_composio.py +11 -0
lfx/components/composio/bitbucket_composio.py +11 -0
lfx/components/composio/canva_composio.py +11 -0
lfx/components/composio/coda_composio.py +11 -0
lfx/components/composio/composio_api.py +10 -0
lfx/components/composio/discord_composio.py +1 -1
lfx/components/composio/elevenlabs_composio.py +11 -0
lfx/components/composio/exa_composio.py +11 -0
lfx/components/composio/firecrawl_composio.py +11 -0
lfx/components/composio/fireflies_composio.py +11 -0
lfx/components/composio/gmail_composio.py +1 -1
lfx/components/composio/googlebigquery_composio.py +11 -0
lfx/components/composio/googlecalendar_composio.py +1 -1
lfx/components/composio/googledocs_composio.py +1 -1
lfx/components/composio/googlemeet_composio.py +1 -1
lfx/components/composio/googlesheets_composio.py +1 -1
lfx/components/composio/googletasks_composio.py +1 -1
lfx/components/composio/heygen_composio.py +11 -0
lfx/components/composio/mem0_composio.py +11 -0
lfx/components/composio/peopledatalabs_composio.py +11 -0
lfx/components/composio/perplexityai_composio.py +11 -0
lfx/components/composio/serpapi_composio.py +11 -0
lfx/components/composio/slack_composio.py +3 -574
lfx/components/composio/slackbot_composio.py +1 -1
lfx/components/composio/snowflake_composio.py +11 -0
lfx/components/composio/tavily_composio.py +11 -0
lfx/components/composio/youtube_composio.py +2 -2
lfx/components/cuga/__init__.py +34 -0
lfx/components/cuga/cuga_agent.py +730 -0
lfx/components/data/__init__.py +78 -28
lfx/components/data_source/__init__.py +58 -0
lfx/components/{data → data_source}/api_request.py +26 -3
lfx/components/{data → data_source}/csv_to_data.py +15 -10
lfx/components/{data → data_source}/json_to_data.py +15 -8
lfx/components/{data → data_source}/news_search.py +1 -1
lfx/components/{data → data_source}/rss.py +1 -1
lfx/components/{data → data_source}/sql_executor.py +1 -1
lfx/components/{data → data_source}/url.py +1 -1
lfx/components/{data → data_source}/web_search.py +1 -1
lfx/components/datastax/astradb_cql.py +1 -1
lfx/components/datastax/astradb_graph.py +1 -1
lfx/components/datastax/astradb_tool.py +1 -1
lfx/components/datastax/astradb_vectorstore.py +1 -1
lfx/components/datastax/hcd.py +1 -1
lfx/components/deactivated/json_document_builder.py +1 -1
lfx/components/docling/__init__.py +0 -3
lfx/components/docling/chunk_docling_document.py +3 -1
lfx/components/docling/export_docling_document.py +3 -1
lfx/components/elastic/elasticsearch.py +1 -1
lfx/components/files_and_knowledge/__init__.py +47 -0
lfx/components/{data → files_and_knowledge}/directory.py +1 -1
lfx/components/{data → files_and_knowledge}/file.py +304 -24
lfx/components/{knowledge_bases → files_and_knowledge}/retrieval.py +2 -2
lfx/components/{data → files_and_knowledge}/save_file.py +218 -31
lfx/components/flow_controls/__init__.py +58 -0
lfx/components/{logic → flow_controls}/conditional_router.py +1 -1
lfx/components/{logic → flow_controls}/loop.py +43 -9
lfx/components/flow_controls/run_flow.py +108 -0
lfx/components/glean/glean_search_api.py +1 -1
lfx/components/groq/groq.py +35 -28
lfx/components/helpers/__init__.py +102 -0
lfx/components/ibm/watsonx.py +7 -1
lfx/components/input_output/__init__.py +3 -1
lfx/components/input_output/chat.py +4 -3
lfx/components/input_output/chat_output.py +10 -4
lfx/components/input_output/text.py +1 -1
lfx/components/input_output/text_output.py +1 -1
lfx/components/{data → input_output}/webhook.py +1 -1
lfx/components/knowledge_bases/__init__.py +59 -4
lfx/components/langchain_utilities/character.py +1 -1
lfx/components/langchain_utilities/csv_agent.py +84 -16
lfx/components/langchain_utilities/json_agent.py +67 -12
lfx/components/langchain_utilities/language_recursive.py +1 -1
lfx/components/llm_operations/__init__.py +46 -0
lfx/components/{processing → llm_operations}/batch_run.py +17 -8
lfx/components/{processing → llm_operations}/lambda_filter.py +1 -1
lfx/components/{logic → llm_operations}/llm_conditional_router.py +1 -1
lfx/components/{processing/llm_router.py → llm_operations/llm_selector.py} +3 -3
lfx/components/{processing → llm_operations}/structured_output.py +1 -1
lfx/components/logic/__init__.py +126 -0
lfx/components/mem0/mem0_chat_memory.py +11 -0
lfx/components/models/__init__.py +64 -9
lfx/components/models_and_agents/__init__.py +49 -0
lfx/components/{agents → models_and_agents}/agent.py +6 -4
lfx/components/models_and_agents/embedding_model.py +353 -0
lfx/components/models_and_agents/language_model.py +398 -0
lfx/components/{agents → models_and_agents}/mcp_component.py +53 -44
lfx/components/{helpers → models_and_agents}/memory.py +1 -1
lfx/components/nvidia/system_assist.py +1 -1
lfx/components/olivya/olivya.py +1 -1
lfx/components/ollama/ollama.py +24 -5
lfx/components/processing/__init__.py +9 -60
lfx/components/processing/converter.py +1 -1
lfx/components/processing/dataframe_operations.py +1 -1
lfx/components/processing/parse_json_data.py +2 -2
lfx/components/processing/parser.py +1 -1
lfx/components/processing/split_text.py +1 -1
lfx/components/qdrant/qdrant.py +1 -1
lfx/components/redis/redis.py +1 -1
lfx/components/twelvelabs/split_video.py +10 -0
lfx/components/twelvelabs/video_file.py +12 -0
lfx/components/utilities/__init__.py +43 -0
lfx/components/{helpers → utilities}/calculator_core.py +1 -1
lfx/components/{helpers → utilities}/current_date.py +1 -1
lfx/components/{processing → utilities}/python_repl_core.py +1 -1
lfx/components/vectorstores/local_db.py +9 -0
lfx/components/youtube/youtube_transcripts.py +118 -30
lfx/custom/custom_component/component.py +57 -1
lfx/custom/custom_component/custom_component.py +68 -6
lfx/custom/directory_reader/directory_reader.py +5 -2
lfx/graph/edge/base.py +43 -20
lfx/graph/state/model.py +15 -2
lfx/graph/utils.py +6 -0
lfx/graph/vertex/param_handler.py +10 -7
lfx/helpers/__init__.py +12 -0
lfx/helpers/flow.py +117 -0
lfx/inputs/input_mixin.py +24 -1
lfx/inputs/inputs.py +13 -1
lfx/interface/components.py +161 -83
lfx/log/logger.py +5 -3
lfx/schema/image.py +2 -12
lfx/services/database/__init__.py +5 -0
lfx/services/database/service.py +25 -0
lfx/services/deps.py +87 -22
lfx/services/interfaces.py +5 -0
lfx/services/manager.py +24 -10
lfx/services/mcp_composer/service.py +1029 -162
lfx/services/session.py +5 -0
lfx/services/settings/auth.py +18 -11
lfx/services/settings/base.py +56 -30
lfx/services/settings/constants.py +8 -0
lfx/services/storage/local.py +108 -46
lfx/services/storage/service.py +171 -29
lfx/template/field/base.py +3 -0
lfx/utils/image.py +29 -11
lfx/utils/ssrf_protection.py +384 -0
lfx/utils/validate_cloud.py +26 -0
{lfx_nightly-0.2.0.dev0.dist-info → lfx_nightly-0.2.0.dev41.dist-info}/METADATA +38 -22
{lfx_nightly-0.2.0.dev0.dist-info → lfx_nightly-0.2.0.dev41.dist-info}/RECORD +189 -160
{lfx_nightly-0.2.0.dev0.dist-info → lfx_nightly-0.2.0.dev41.dist-info}/WHEEL +1 -1
lfx/components/agents/altk_agent.py +0 -366
lfx/components/agents/cuga_agent.py +0 -1013
lfx/components/docling/docling_remote_vlm.py +0 -284
lfx/components/logic/run_flow.py +0 -71
lfx/components/models/embedding_model.py +0 -195
lfx/components/models/language_model.py +0 -144
lfx/components/processing/dataframe_to_toolset.py +0 -259
/lfx/components/{data → data_source}/mock_data.py +0 -0
/lfx/components/{knowledge_bases → files_and_knowledge}/ingestion.py +0 -0
/lfx/components/{logic → flow_controls}/data_conditional_router.py +0 -0
/lfx/components/{logic → flow_controls}/flow_tool.py +0 -0
/lfx/components/{logic → flow_controls}/listen.py +0 -0
/lfx/components/{logic → flow_controls}/notify.py +0 -0
/lfx/components/{logic → flow_controls}/pass_message.py +0 -0
/lfx/components/{logic → flow_controls}/sub_flow.py +0 -0
/lfx/components/{processing → models_and_agents}/prompt.py +0 -0
/lfx/components/{helpers → processing}/create_list.py +0 -0
/lfx/components/{helpers → processing}/output_parser.py +0 -0
/lfx/components/{helpers → processing}/store_message.py +0 -0
/lfx/components/{helpers → utilities}/id_generator.py +0 -0
{lfx_nightly-0.2.0.dev0.dist-info → lfx_nightly-0.2.0.dev41.dist-info}/entry_points.txt +0 -0

lfx/base/data/base_file.py CHANGED Viewed

@@ -2,6 +2,7 @@ import ast
 import shutil
 import tarfile
 from abc import ABC, abstractmethod
+from io import BytesIO
 from pathlib import Path
 from tempfile import TemporaryDirectory
 from typing import TYPE_CHECKING, Any
@@ -10,11 +11,14 @@ from zipfile import ZipFile, is_zipfile
 import orjson
 import pandas as pd
+from lfx.base.data.storage_utils import get_file_size, read_file_bytes
 from lfx.custom.custom_component.component import Component
 from lfx.io import BoolInput, FileInput, HandleInput, Output, StrInput
 from lfx.schema.data import Data
 from lfx.schema.dataframe import DataFrame
 from lfx.schema.message import Message
+from lfx.services.deps import get_settings_service
+from lfx.utils.async_helpers import run_until_complete
 from lfx.utils.helpers import build_content_type_from_extension
 if TYPE_CHECKING:
@@ -27,6 +31,8 @@ class BaseFileComponent(Component, ABC):
     This class provides common functionality for resolving, validating, and
     processing file paths. Child classes must define valid file extensions
     and implement the `process_files` method.
+    # TODO: May want to subclass for local and remote files
     """
     class BaseFile:
@@ -251,12 +257,25 @@ class BaseFileComponent(Component, ABC):
         file_path = data_item.file_path
         file_path_obj = Path(file_path)
-        file_size_stat = file_path_obj.stat()
         filename = file_path_obj.name
+        settings = get_settings_service().settings
+        if settings.storage_type == "s3":
+            try:
+                file_size = get_file_size(file_path)
+            except (FileNotFoundError, ValueError):
+                # If we can't get file size, set to 0 or omit
+                file_size = 0
+        else:
+            try:
+                file_size_stat = file_path_obj.stat()
+                file_size = file_size_stat.st_size
+            except OSError:
+                file_size = 0
         # Basic file metadata
         metadata["filename"] = filename
-        metadata["file_size"] = file_size_stat.st_size
+        metadata["file_size"] = file_size
         # Add MIME type from extension
         extension = filename.split(".")[-1]
@@ -321,7 +340,16 @@ class BaseFileComponent(Component, ABC):
             Message: Message containing file paths
         """
         files = self._validate_and_resolve_paths()
-        paths = [file.path.as_posix() for file in files if file.path.exists()]
+        settings = get_settings_service().settings
+        # For S3 storage, paths are virtual storage keys that don't exist on the local filesystem.
+        # Skip the exists() check for S3 files to preserve them in the output.
+        # Validation of S3 file existence is deferred until file processing (see _validate_and_resolve_paths).
+        # If a file was removed from S3, it will fail when attempting to read/process it later.
+        if settings.storage_type == "s3":
+            paths = [file.path.as_posix() for file in files]
+        else:
+            paths = [file.path.as_posix() for file in files if file.path.exists()]
         return Message(text="\n".join(paths) if paths else "")
@@ -329,7 +357,29 @@ class BaseFileComponent(Component, ABC):
         if not file_path:
             return None
-        # Map file extensions to pandas read functions with type annotation
+        # Get file extension in lowercase
+        ext = Path(file_path).suffix.lower()
+        settings = get_settings_service().settings
+        # For S3 storage, download file bytes first
+        if settings.storage_type == "s3":
+            # Download file content from S3
+            content = run_until_complete(read_file_bytes(file_path))
+            # Map file extensions to pandas read functions that support BytesIO
+            if ext == ".csv":
+                result = pd.read_csv(BytesIO(content))
+            elif ext == ".xlsx":
+                result = pd.read_excel(BytesIO(content))
+            elif ext == ".parquet":
+                result = pd.read_parquet(BytesIO(content))
+            else:
+                return None
+            return result.to_dict("records")
+        # Local storage - read directly from filesystem
         file_readers: dict[str, Callable[[str], pd.DataFrame]] = {
             ".csv": pd.read_csv,
             ".xlsx": pd.read_excel,
@@ -337,9 +387,6 @@ class BaseFileComponent(Component, ABC):
             # TODO: sqlite and json support?
         }
-        # Get file extension in lowercase
-        ext = Path(file_path).suffix.lower()
         # Get the appropriate reader function or None
         reader = file_readers.get(ext)
@@ -558,16 +605,38 @@ class BaseFileComponent(Component, ABC):
         resolved_files = []
         def add_file(data: Data, path: str | Path, *, delete_after_processing: bool):
-            resolved_path = Path(self.resolve_path(str(path)))
+            path_str = str(path)
+            settings = get_settings_service().settings
+            # When using object storage (S3), file paths are storage keys (e.g., "<flow_id>/<filename>")
+            # that don't exist on the local filesystem. We defer validation until file processing.
+            # For local storage, validate the file exists immediately to fail fast.
+            if settings.storage_type == "s3":
+                resolved_files.append(
+                    BaseFileComponent.BaseFile(data, Path(path_str), delete_after_processing=delete_after_processing)
+                )
+            else:
+                # Check if path looks like a storage path (flow_id/filename format)
+                # If so, use get_full_path to resolve it to the actual storage location
+                if "/" in path_str and not Path(path_str).is_absolute():
+                    try:
+                        resolved_path = Path(self.get_full_path(path_str))
+                        self.log(f"Resolved storage path '{path_str}' to '{resolved_path}'")
+                    except (ValueError, AttributeError) as e:
+                        # Fallback to resolve_path if get_full_path fails
+                        self.log(f"get_full_path failed for '{path_str}': {e}, falling back to resolve_path")
+                        resolved_path = Path(self.resolve_path(path_str))
+                else:
+                    resolved_path = Path(self.resolve_path(path_str))
-            if not resolved_path.exists():
-                msg = f"File or directory not found: {path}"
-                self.log(msg)
-                if not self.silent_errors:
-                    raise ValueError(msg)
-            resolved_files.append(
-                BaseFileComponent.BaseFile(data, resolved_path, delete_after_processing=delete_after_processing)
-            )
+                if not resolved_path.exists():
+                    msg = f"File not found: '{path}' (resolved to: '{resolved_path}'). Please upload the file again."
+                    self.log(msg)
+                    if not self.silent_errors:
+                        raise ValueError(msg)
+                resolved_files.append(
+                    BaseFileComponent.BaseFile(data, resolved_path, delete_after_processing=delete_after_processing)
+                )
         file_path = self._file_path_as_list()
@@ -707,7 +776,7 @@ class BaseFileComponent(Component, ABC):
             raise ValueError(msg)
     def _filter_and_mark_files(self, files: list[BaseFile]) -> list[BaseFile]:
-        """Validate file types and mark files for removal.
+        """Validate file types and filter out invalid files.
         Args:
             files (list[BaseFile]): List of BaseFile instances.
@@ -718,18 +787,26 @@ class BaseFileComponent(Component, ABC):
         Raises:
             ValueError: If unsupported files are encountered and `ignore_unsupported_extensions` is False.
         """
+        settings = get_settings_service().settings
+        is_s3_storage = settings.storage_type == "s3"
         final_files = []
         ignored_files = []
         for file in files:
-            if not file.path.is_file():
+            # For local storage, verify the path is actually a file
+            # For S3 storage, paths are virtual keys that don't exist locally
+            if not is_s3_storage and not file.path.is_file():
                 self.log(f"Not a file: {file.path.name}")
                 continue
-            if file.path.suffix[1:].lower() not in self.valid_extensions:
-                if self.ignore_unsupported_extensions:
+            # Validate file extension
+            extension = file.path.suffix[1:].lower() if file.path.suffix else ""
+            if extension not in self.valid_extensions:
+                # For local storage, optionally ignore unsupported extensions
+                if not is_s3_storage and self.ignore_unsupported_extensions:
                     ignored_files.append(file.path.name)
                     continue
                 msg = f"Unsupported file extension: {file.path.suffix}"
                 self.log(msg)
                 if not self.silent_errors:

lfx/base/data/docling_utils.py CHANGED Viewed

@@ -25,21 +25,72 @@ class DoclingDependencyError(Exception):
         super().__init__(f"{dependency_name} is not correctly installed. {install_command}")
-def extract_docling_documents(data_inputs: Data | list[Data] | DataFrame, doc_key: str) -> list[DoclingDocument]:
+def extract_docling_documents(
+    data_inputs: Data | list[Data] | DataFrame, doc_key: str
+) -> tuple[list[DoclingDocument], str | None]:
+    """Extract DoclingDocument objects from data inputs.
+    Args:
+        data_inputs: The data inputs containing DoclingDocument objects
+        doc_key: The key/column name to look for DoclingDocument objects
+    Returns:
+        A tuple of (documents, warning_message) where warning_message is None if no warning
+    Raises:
+        TypeError: If the data cannot be extracted or is invalid
+    """
     documents: list[DoclingDocument] = []
+    warning_message: str | None = None
     if isinstance(data_inputs, DataFrame):
         if not len(data_inputs):
             msg = "DataFrame is empty"
             raise TypeError(msg)
-        if doc_key not in data_inputs.columns:
-            msg = f"Column '{doc_key}' not found in DataFrame"
-            raise TypeError(msg)
-        try:
-            documents = data_inputs[doc_key].tolist()
-        except Exception as e:
-            msg = f"Error extracting DoclingDocument from DataFrame: {e}"
-            raise TypeError(msg) from e
+        # Primary: Check for exact column name match
+        if doc_key in data_inputs.columns:
+            try:
+                documents = data_inputs[doc_key].tolist()
+            except Exception as e:
+                msg = f"Error extracting DoclingDocument from DataFrame column '{doc_key}': {e}"
+                raise TypeError(msg) from e
+        else:
+            # Fallback: Search all columns for DoclingDocument objects
+            found_column = None
+            for col in data_inputs.columns:
+                try:
+                    # Check if this column contains DoclingDocument objects
+                    sample = data_inputs[col].dropna().iloc[0] if len(data_inputs[col].dropna()) > 0 else None
+                    if sample is not None and isinstance(sample, DoclingDocument):
+                        found_column = col
+                        break
+                except (IndexError, AttributeError):
+                    continue
+            if found_column:
+                warning_message = (
+                    f"Column '{doc_key}' not found, but found DoclingDocument objects in column '{found_column}'. "
+                    f"Using '{found_column}' instead. Consider updating the 'Doc Key' parameter."
+                )
+                logger.warning(warning_message)
+                try:
+                    documents = data_inputs[found_column].tolist()
+                except Exception as e:
+                    msg = f"Error extracting DoclingDocument from DataFrame column '{found_column}': {e}"
+                    raise TypeError(msg) from e
+            else:
+                # Provide helpful error message
+                available_columns = list(data_inputs.columns)
+                msg = (
+                    f"Column '{doc_key}' not found in DataFrame. "
+                    f"Available columns: {available_columns}. "
+                    f"\n\nPossible solutions:\n"
+                    f"1. Use the 'Data' output from Docling component instead of 'DataFrame' output\n"
+                    f"2. Update the 'Doc Key' parameter to match one of the available columns\n"
+                    f"3. If using VLM pipeline, try using the standard pipeline"
+                )
+                raise TypeError(msg)
     else:
         if not data_inputs:
             msg = "No data inputs provided"
@@ -69,7 +120,7 @@ def extract_docling_documents(data_inputs: Data | list[Data] | DataFrame, doc_ke
             except AttributeError as e:
                 msg = f"Invalid input type in collection: {e}"
                 raise TypeError(msg) from e
-    return documents
+    return documents, warning_message
 def _unwrap_secrets(obj):

lfx/base/data/storage_utils.py ADDED Viewed

@@ -0,0 +1,301 @@
+"""Storage-aware file utilities for components.
+This module provides utilities that work with both local files and remote files
+stored in the storage service.
+TODO: Can abstract these into the storage service interface and update
+implementations.
+"""
+from __future__ import annotations
+from pathlib import Path
+from typing import TYPE_CHECKING
+from lfx.services.deps import get_settings_service, get_storage_service
+from lfx.utils.async_helpers import run_until_complete
+if TYPE_CHECKING:
+    from collections.abc import Callable
+    from lfx.services.storage.service import StorageService
+# Constants for path parsing
+EXPECTED_PATH_PARTS = 2  # Path format: "flow_id/filename"
+def parse_storage_path(path: str) -> tuple[str, str] | None:
+    """Parse a storage service path into flow_id and filename.
+    Storage service paths follow the format: flow_id/filename
+    This should only be called when storage_type == "s3".
+    Args:
+        path: The storage service path in format "flow_id/filename"
+    Returns:
+        tuple[str, str] | None: (flow_id, filename) or None if invalid format
+    """
+    if not path or "/" not in path:
+        return None
+    parts = path.split("/", 1)
+    if len(parts) != EXPECTED_PATH_PARTS or not parts[0] or not parts[1]:
+        return None
+    return parts[0], parts[1]
+async def read_file_bytes(
+    file_path: str,
+    storage_service: StorageService | None = None,
+    resolve_path: Callable[[str], str] | None = None,
+) -> bytes:
+    """Read file bytes from either storage service or local filesystem.
+    Args:
+        file_path: Path to the file (S3 key format "flow_id/filename" or local path)
+        storage_service: Optional storage service instance (will get from deps if not provided)
+        resolve_path: Optional function to resolve relative paths to absolute paths
+                     (typically Component.resolve_path). Only used for local storage.
+    Returns:
+        bytes: The file content
+    Raises:
+        FileNotFoundError: If the file doesn't exist
+    """
+    settings = get_settings_service().settings
+    if settings.storage_type == "s3":
+        parsed = parse_storage_path(file_path)
+        if not parsed:
+            msg = f"Invalid S3 path format: {file_path}. Expected 'flow_id/filename'"
+            raise ValueError(msg)
+        if storage_service is None:
+            storage_service = get_storage_service()
+        flow_id, filename = parsed
+        return await storage_service.get_file(flow_id, filename)
+    # For local storage, resolve path if resolver provided
+    if resolve_path:
+        file_path = resolve_path(file_path)
+    path_obj = Path(file_path)
+    if not path_obj.exists():
+        msg = f"File not found: {file_path}"
+        raise FileNotFoundError(msg)
+    return path_obj.read_bytes()
+async def read_file_text(
+    file_path: str,
+    encoding: str = "utf-8",
+    storage_service: StorageService | None = None,
+    resolve_path: Callable[[str], str] | None = None,
+    newline: str | None = None,
+) -> str:
+    r"""Read file text from either storage service or local filesystem.
+    Args:
+        file_path: Path to the file (storage service path or local path)
+        encoding: Text encoding to use
+        storage_service: Optional storage service instance
+        resolve_path: Optional function to resolve relative paths to absolute paths
+                     (typically Component.resolve_path). Only used for local storage.
+        newline: Newline mode (None for default, "" for universal newlines like CSV).
+                 When set to "", normalizes all line endings to \\n for consistency.
+    Returns:
+        str: The file content as text
+    Raises:
+        FileNotFoundError: If the file doesn't exist
+    """
+    settings = get_settings_service().settings
+    if settings.storage_type == "s3":
+        content = await read_file_bytes(file_path, storage_service, resolve_path)
+        text = content.decode(encoding)
+        # Normalize newlines for S3 when newline="" is specified (universal newline mode)
+        if newline == "":
+            # Convert all line endings to \n (matches Python's universal newline mode)
+            text = text.replace("\r\n", "\n").replace("\r", "\n")
+        return text
+    # For local storage, resolve path if resolver provided
+    if resolve_path:
+        file_path = resolve_path(file_path)
+    path_obj = Path(file_path)
+    if newline is not None:
+        with path_obj.open(newline=newline, encoding=encoding) as f:  # noqa: ASYNC230
+            return f.read()
+    return path_obj.read_text(encoding=encoding)
+def get_file_size(file_path: str, storage_service: StorageService | None = None) -> int:
+    """Get file size from either storage service or local filesystem.
+    Note: This is a sync wrapper - for async code, use the storage service directly.
+    Args:
+        file_path: Path to the file (S3 key format "flow_id/filename" or absolute local path)
+        storage_service: Optional storage service instance
+    Returns:
+        int: File size in bytes
+    Raises:
+        FileNotFoundError: If the file doesn't exist
+    """
+    settings = get_settings_service().settings
+    if settings.storage_type == "s3":
+        parsed = parse_storage_path(file_path)
+        if not parsed:
+            msg = f"Invalid S3 path format: {file_path}. Expected 'flow_id/filename'"
+            raise ValueError(msg)
+        if storage_service is None:
+            storage_service = get_storage_service()
+        flow_id, filename = parsed
+        return run_until_complete(storage_service.get_file_size(flow_id, filename))
+    # Local file system
+    path_obj = Path(file_path)
+    if not path_obj.exists():
+        msg = f"File not found: {file_path}"
+        raise FileNotFoundError(msg)
+    return path_obj.stat().st_size
+def file_exists(file_path: str, storage_service: StorageService | None = None) -> bool:
+    """Check if a file exists in either storage service or local filesystem.
+    Args:
+        file_path: Path to the file (S3 key format "flow_id/filename" or absolute local path)
+        storage_service: Optional storage service instance
+    Returns:
+        bool: True if the file exists
+    """
+    try:
+        get_file_size(file_path, storage_service)
+    except (FileNotFoundError, ValueError):
+        return False
+    else:
+        return True
+# Magic bytes signatures for common image formats
+MIN_IMAGE_HEADER_SIZE = 12  # Minimum bytes needed to detect image type
+IMAGE_SIGNATURES: dict[str, list[tuple[bytes, int]]] = {
+    "jpeg": [(b"\xff\xd8\xff", 0)],
+    "jpg": [(b"\xff\xd8\xff", 0)],
+    "png": [(b"\x89PNG\r\n\x1a\n", 0)],
+    "gif": [(b"GIF87a", 0), (b"GIF89a", 0)],
+    "webp": [(b"RIFF", 0)],  # WebP starts with RIFF, then has WEBP at offset 8
+    "bmp": [(b"BM", 0)],
+    "tiff": [(b"II*\x00", 0), (b"MM\x00*", 0)],  # Little-endian and big-endian TIFF
+}
+def detect_image_type_from_bytes(content: bytes) -> str | None:
+    """Detect the actual image type from file content using magic bytes.
+    Args:
+        content: The file content bytes (at least first 12 bytes needed)
+    Returns:
+        str | None: The detected image type (e.g., "jpeg", "png") or None if not recognized
+    """
+    if len(content) < MIN_IMAGE_HEADER_SIZE:
+        return None
+    # Check WebP specifically (needs to check both RIFF and WEBP)
+    if content[:4] == b"RIFF" and content[8:12] == b"WEBP":
+        return "webp"
+    # Check other image signatures
+    for image_type, signatures in IMAGE_SIGNATURES.items():
+        if image_type == "webp":
+            continue  # Already handled above
+        for signature, offset in signatures:
+            if content[offset : offset + len(signature)] == signature:
+                return image_type
+    return None
+def validate_image_content_type(
+    file_path: str,
+    content: bytes | None = None,
+    storage_service: StorageService | None = None,
+    resolve_path: Callable[[str], str] | None = None,
+) -> tuple[bool, str | None]:
+    """Validate that an image file's content matches its declared extension.
+    This prevents errors like "Image does not match the provided media type image/png"
+    when a JPEG file is saved with a .png extension.
+    Only rejects files when we can definitively detect a mismatch. Files with
+    unrecognized content are allowed through (they may fail later, but that's
+    better than false positives blocking valid files).
+    Args:
+        file_path: Path to the image file
+        content: Optional pre-read file content bytes. If not provided, will read from file.
+        storage_service: Optional storage service instance for S3 files
+        resolve_path: Optional function to resolve relative paths
+    Returns:
+        tuple[bool, str | None]: (is_valid, error_message)
+            - (True, None) if the content matches the extension, is unrecognized, or file is not an image
+            - (False, error_message) if there's a definite mismatch
+    """
+    # Get the file extension
+    path_obj = Path(file_path)
+    extension = path_obj.suffix[1:].lower() if path_obj.suffix else ""
+    # Only validate image files
+    image_extensions = {"jpeg", "jpg", "png", "gif", "webp", "bmp", "tiff"}
+    if extension not in image_extensions:
+        return True, None
+    # Read content if not provided
+    if content is None:
+        try:
+            content = run_until_complete(read_file_bytes(file_path, storage_service, resolve_path))
+        except (FileNotFoundError, ValueError):
+            # Can't read file - let it pass, will fail later with better error
+            return True, None
+    # Detect actual image type
+    detected_type = detect_image_type_from_bytes(content)
+    # If we can't detect the type, the file is not a valid image
+    if detected_type is None:
+        return False, (
+            f"File '{path_obj.name}' has extension '.{extension}' but its content "
+            f"is not a valid image format. The file may be corrupted, empty, or not a real image."
+        )
+    # Normalize extensions for comparison (jpg == jpeg, tif == tiff)
+    extension_normalized = "jpeg" if extension == "jpg" else extension
+    detected_normalized = "jpeg" if detected_type == "jpg" else detected_type
+    if extension_normalized != detected_normalized:
+        return False, (
+            f"File '{path_obj.name}' has extension '.{extension}' but contains "
+            f"'{detected_type.upper()}' image data. This mismatch will cause API errors. "
+            f"Please rename the file with the correct extension '.{detected_type}' or "
+            f"re-save it in the correct format."
+        )
+    return True, None

lfx-nightly 0.2.0.dev0__py3-none-any.whl → 0.2.0.dev41__py3-none-any.whl

lfx-nightly 0.2.0.dev0py3-none-any.whl → 0.2.0.dev41py3-none-any.whl