PyPI - lfx-nightly - Versions diffs - 0.2.0.dev26__py3-none-any.whl → 0.2.1.dev7__py3-none-any.whl - Mend

lfx-nightly 0.2.0.dev26py3-none-any.whl → 0.2.1.dev7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

lfx/_assets/component_index.json +1 -1
lfx/base/agents/agent.py +9 -4
lfx/base/agents/altk_base_agent.py +16 -3
lfx/base/agents/altk_tool_wrappers.py +1 -1
lfx/base/agents/utils.py +4 -0
lfx/base/composio/composio_base.py +78 -41
lfx/base/data/base_file.py +14 -4
lfx/base/data/cloud_storage_utils.py +156 -0
lfx/base/data/docling_utils.py +191 -65
lfx/base/data/storage_utils.py +109 -0
lfx/base/datastax/astradb_base.py +75 -64
lfx/base/mcp/util.py +2 -2
lfx/base/models/__init__.py +11 -1
lfx/base/models/anthropic_constants.py +21 -12
lfx/base/models/google_generative_ai_constants.py +33 -9
lfx/base/models/model_metadata.py +6 -0
lfx/base/models/ollama_constants.py +196 -30
lfx/base/models/openai_constants.py +37 -10
lfx/base/models/unified_models.py +1123 -0
lfx/base/models/watsonx_constants.py +36 -0
lfx/base/tools/component_tool.py +2 -9
lfx/cli/commands.py +6 -1
lfx/cli/run.py +65 -409
lfx/cli/script_loader.py +13 -3
lfx/components/__init__.py +0 -3
lfx/components/composio/github_composio.py +1 -1
lfx/components/cuga/cuga_agent.py +39 -27
lfx/components/data_source/api_request.py +4 -2
lfx/components/docling/__init__.py +45 -11
lfx/components/docling/chunk_docling_document.py +3 -1
lfx/components/docling/docling_inline.py +39 -49
lfx/components/docling/export_docling_document.py +3 -1
lfx/components/elastic/opensearch_multimodal.py +215 -57
lfx/components/files_and_knowledge/file.py +439 -39
lfx/components/files_and_knowledge/ingestion.py +8 -0
lfx/components/files_and_knowledge/retrieval.py +10 -0
lfx/components/files_and_knowledge/save_file.py +123 -53
lfx/components/ibm/watsonx.py +7 -1
lfx/components/input_output/chat_output.py +7 -1
lfx/components/langchain_utilities/tool_calling.py +14 -6
lfx/components/llm_operations/batch_run.py +80 -25
lfx/components/llm_operations/lambda_filter.py +33 -6
lfx/components/llm_operations/llm_conditional_router.py +39 -7
lfx/components/llm_operations/structured_output.py +38 -12
lfx/components/models/__init__.py +16 -74
lfx/components/models_and_agents/agent.py +51 -201
lfx/components/models_and_agents/embedding_model.py +185 -339
lfx/components/models_and_agents/language_model.py +54 -318
lfx/components/models_and_agents/mcp_component.py +58 -9
lfx/components/ollama/ollama.py +9 -4
lfx/components/ollama/ollama_embeddings.py +2 -1
lfx/components/openai/openai_chat_model.py +1 -1
lfx/components/processing/__init__.py +0 -3
lfx/components/vllm/__init__.py +37 -0
lfx/components/vllm/vllm.py +141 -0
lfx/components/vllm/vllm_embeddings.py +110 -0
lfx/custom/custom_component/custom_component.py +8 -6
lfx/custom/directory_reader/directory_reader.py +5 -2
lfx/graph/utils.py +64 -18
lfx/inputs/__init__.py +2 -0
lfx/inputs/input_mixin.py +54 -0
lfx/inputs/inputs.py +115 -0
lfx/interface/initialize/loading.py +42 -12
lfx/io/__init__.py +2 -0
lfx/run/__init__.py +5 -0
lfx/run/base.py +494 -0
lfx/schema/data.py +1 -1
lfx/schema/image.py +28 -19
lfx/schema/message.py +19 -3
lfx/services/interfaces.py +5 -0
lfx/services/manager.py +5 -4
lfx/services/mcp_composer/service.py +45 -13
lfx/services/settings/auth.py +18 -11
lfx/services/settings/base.py +12 -24
lfx/services/settings/constants.py +2 -0
lfx/services/storage/local.py +37 -0
lfx/services/storage/service.py +19 -0
lfx/utils/constants.py +1 -0
lfx/utils/image.py +29 -11
lfx/utils/validate_cloud.py +14 -3
{lfx_nightly-0.2.0.dev26.dist-info → lfx_nightly-0.2.1.dev7.dist-info}/METADATA +5 -2
{lfx_nightly-0.2.0.dev26.dist-info → lfx_nightly-0.2.1.dev7.dist-info}/RECORD +84 -78
lfx/components/processing/dataframe_to_toolset.py +0 -259
{lfx_nightly-0.2.0.dev26.dist-info → lfx_nightly-0.2.1.dev7.dist-info}/WHEEL +0 -0
{lfx_nightly-0.2.0.dev26.dist-info → lfx_nightly-0.2.1.dev7.dist-info}/entry_points.txt +0 -0

lfx/base/data/docling_utils.py CHANGED Viewed

@@ -3,7 +3,7 @@ import signal
 import sys
 import traceback
 from contextlib import suppress
-from typing import TYPE_CHECKING
+from functools import lru_cache
 from docling_core.types.doc import DoclingDocument
 from pydantic import BaseModel, SecretStr, TypeAdapter
@@ -12,9 +12,6 @@ from lfx.log.logger import logger
 from lfx.schema.data import Data
 from lfx.schema.dataframe import DataFrame
-if TYPE_CHECKING:
-    from langchain_core.language_models.chat_models import BaseChatModel
 class DoclingDependencyError(Exception):
     """Custom exception for missing Docling dependencies."""
@@ -25,21 +22,72 @@ class DoclingDependencyError(Exception):
         super().__init__(f"{dependency_name} is not correctly installed. {install_command}")
-def extract_docling_documents(data_inputs: Data | list[Data] | DataFrame, doc_key: str) -> list[DoclingDocument]:
+def extract_docling_documents(
+    data_inputs: Data | list[Data] | DataFrame, doc_key: str
+) -> tuple[list[DoclingDocument], str | None]:
+    """Extract DoclingDocument objects from data inputs.
+    Args:
+        data_inputs: The data inputs containing DoclingDocument objects
+        doc_key: The key/column name to look for DoclingDocument objects
+    Returns:
+        A tuple of (documents, warning_message) where warning_message is None if no warning
+    Raises:
+        TypeError: If the data cannot be extracted or is invalid
+    """
     documents: list[DoclingDocument] = []
+    warning_message: str | None = None
     if isinstance(data_inputs, DataFrame):
         if not len(data_inputs):
             msg = "DataFrame is empty"
             raise TypeError(msg)
-        if doc_key not in data_inputs.columns:
-            msg = f"Column '{doc_key}' not found in DataFrame"
-            raise TypeError(msg)
-        try:
-            documents = data_inputs[doc_key].tolist()
-        except Exception as e:
-            msg = f"Error extracting DoclingDocument from DataFrame: {e}"
-            raise TypeError(msg) from e
+        # Primary: Check for exact column name match
+        if doc_key in data_inputs.columns:
+            try:
+                documents = data_inputs[doc_key].tolist()
+            except Exception as e:
+                msg = f"Error extracting DoclingDocument from DataFrame column '{doc_key}': {e}"
+                raise TypeError(msg) from e
+        else:
+            # Fallback: Search all columns for DoclingDocument objects
+            found_column = None
+            for col in data_inputs.columns:
+                try:
+                    # Check if this column contains DoclingDocument objects
+                    sample = data_inputs[col].dropna().iloc[0] if len(data_inputs[col].dropna()) > 0 else None
+                    if sample is not None and isinstance(sample, DoclingDocument):
+                        found_column = col
+                        break
+                except (IndexError, AttributeError):
+                    continue
+            if found_column:
+                warning_message = (
+                    f"Column '{doc_key}' not found, but found DoclingDocument objects in column '{found_column}'. "
+                    f"Using '{found_column}' instead. Consider updating the 'Doc Key' parameter."
+                )
+                logger.warning(warning_message)
+                try:
+                    documents = data_inputs[found_column].tolist()
+                except Exception as e:
+                    msg = f"Error extracting DoclingDocument from DataFrame column '{found_column}': {e}"
+                    raise TypeError(msg) from e
+            else:
+                # Provide helpful error message
+                available_columns = list(data_inputs.columns)
+                msg = (
+                    f"Column '{doc_key}' not found in DataFrame. "
+                    f"Available columns: {available_columns}. "
+                    f"\n\nPossible solutions:\n"
+                    f"1. Use the 'Data' output from Docling component instead of 'DataFrame' output\n"
+                    f"2. Update the 'Doc Key' parameter to match one of the available columns\n"
+                    f"3. If using VLM pipeline, try using the standard pipeline"
+                )
+                raise TypeError(msg)
     else:
         if not data_inputs:
             msg = "No data inputs provided"
@@ -69,7 +117,7 @@ def extract_docling_documents(data_inputs: Data | list[Data] | DataFrame, doc_ke
             except AttributeError as e:
                 msg = f"Invalid input type in collection: {e}"
                 raise TypeError(msg) from e
-    return documents
+    return documents, warning_message
 def _unwrap_secrets(obj):
@@ -101,6 +149,81 @@ def _deserialize_pydantic_model(data: dict):
     return adapter.validate_python(data["config"])
+# Global cache for DocumentConverter instances
+# This cache persists across multiple runs and thread invocations
+@lru_cache(maxsize=4)
+def _get_cached_converter(
+    pipeline: str,
+    ocr_engine: str,
+    *,
+    do_picture_classification: bool,
+    pic_desc_config_hash: str | None,
+):
+    """Create and cache a DocumentConverter instance based on configuration.
+    This function uses LRU caching to maintain DocumentConverter instances in memory,
+    eliminating the 15-20 minute model loading time on subsequent runs.
+    Args:
+        pipeline: The pipeline type ("standard" or "vlm")
+        ocr_engine: The OCR engine to use
+        do_picture_classification: Whether to enable picture classification
+        pic_desc_config_hash: Hash of the picture description config (for cache key)
+    Returns:
+        A cached or newly created DocumentConverter instance
+    """
+    from docling.datamodel.base_models import InputFormat
+    from docling.datamodel.pipeline_options import OcrOptions, PdfPipelineOptions, VlmPipelineOptions
+    from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
+    from docling.models.factories import get_ocr_factory
+    from docling.pipeline.vlm_pipeline import VlmPipeline
+    logger.info(f"Creating DocumentConverter for pipeline={pipeline}, ocr_engine={ocr_engine}")
+    # Configure the standard PDF pipeline
+    def _get_standard_opts() -> PdfPipelineOptions:
+        pipeline_options = PdfPipelineOptions()
+        pipeline_options.do_ocr = ocr_engine not in {"", "None"}
+        if pipeline_options.do_ocr:
+            ocr_factory = get_ocr_factory(
+                allow_external_plugins=False,
+            )
+            ocr_options: OcrOptions = ocr_factory.create_options(
+                kind=ocr_engine,
+            )
+            pipeline_options.ocr_options = ocr_options
+        pipeline_options.do_picture_classification = do_picture_classification
+        # Note: pic_desc_config_hash is for cache key only
+        # Actual picture description is handled separately (non-cached path)
+        _ = pic_desc_config_hash  # Mark as intentionally unused
+        return pipeline_options
+    # Configure the VLM pipeline
+    def _get_vlm_opts() -> VlmPipelineOptions:
+        return VlmPipelineOptions()
+    if pipeline == "standard":
+        pdf_format_option = PdfFormatOption(
+            pipeline_options=_get_standard_opts(),
+        )
+    elif pipeline == "vlm":
+        pdf_format_option = PdfFormatOption(pipeline_cls=VlmPipeline, pipeline_options=_get_vlm_opts())
+    else:
+        msg = f"Unknown pipeline: {pipeline!r}"
+        raise ValueError(msg)
+    format_options: dict[InputFormat, FormatOption] = {
+        InputFormat.PDF: pdf_format_option,
+        InputFormat.IMAGE: pdf_format_option,
+    }
+    return DocumentConverter(format_options=format_options)
 def docling_worker(
     *,
     file_paths: list[str],
@@ -111,7 +234,12 @@ def docling_worker(
     pic_desc_config: dict | None,
     pic_desc_prompt: str,
 ):
-    """Worker function for processing files with Docling in a separate process."""
+    """Worker function for processing files with Docling using threading.
+    This function now uses a globally cached DocumentConverter instance,
+    significantly reducing processing time on subsequent runs from 15-20 minutes
+    to just seconds.
+    """
     # Signal handling for graceful shutdown
     shutdown_requested = False
@@ -154,12 +282,12 @@ def docling_worker(
     check_shutdown()
     try:
-        from docling.datamodel.base_models import ConversionStatus, InputFormat
-        from docling.datamodel.pipeline_options import OcrOptions, PdfPipelineOptions, VlmPipelineOptions
-        from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
-        from docling.models.factories import get_ocr_factory
-        from docling.pipeline.vlm_pipeline import VlmPipeline
-        from langchain_docling.picture_description import PictureDescriptionLangChainOptions
+        from docling.datamodel.base_models import ConversionStatus, InputFormat  # noqa: F401
+        from docling.datamodel.pipeline_options import OcrOptions, PdfPipelineOptions, VlmPipelineOptions  # noqa: F401
+        from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption  # noqa: F401
+        from docling.models.factories import get_ocr_factory  # noqa: F401
+        from docling.pipeline.vlm_pipeline import VlmPipeline  # noqa: F401
+        from langchain_docling.picture_description import PictureDescriptionLangChainOptions  # noqa: F401
         # Check for shutdown after imports
         check_shutdown()
@@ -182,27 +310,34 @@ def docling_worker(
         queue.put({"error": "Worker interrupted during imports", "shutdown": True})
         return
-    # Configure the standard PDF pipeline
-    def _get_standard_opts() -> PdfPipelineOptions:
+    # Use cached converter instead of creating new one each time
+    # This is the key optimization that eliminates 15-20 minute model load times
+    def _get_converter() -> DocumentConverter:
         check_shutdown()  # Check before heavy operations
-        pipeline_options = PdfPipelineOptions()
-        pipeline_options.do_ocr = ocr_engine not in {"", "None"}
-        if pipeline_options.do_ocr:
-            ocr_factory = get_ocr_factory(
-                allow_external_plugins=False,
-            )
-            ocr_options: OcrOptions = ocr_factory.create_options(
-                kind=ocr_engine,
-            )
-            pipeline_options.ocr_options = ocr_options
-        pipeline_options.do_picture_classification = do_picture_classification
+        # For now, we don't support pic_desc_config caching due to serialization complexity
+        # This is a known limitation that can be addressed in a future enhancement
         if pic_desc_config:
-            pic_desc_llm: BaseChatModel = _deserialize_pydantic_model(pic_desc_config)
+            logger.warning(
+                "Picture description with LLM is not yet supported with cached converters. "
+                "Using non-cached converter for this request."
+            )
+            # Fall back to creating a new converter (old behavior)
+            from docling.datamodel.base_models import InputFormat
+            from docling.datamodel.pipeline_options import PdfPipelineOptions
+            from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
+            from docling.models.factories import get_ocr_factory
+            from langchain_docling.picture_description import PictureDescriptionLangChainOptions
+            pipeline_options = PdfPipelineOptions()
+            pipeline_options.do_ocr = ocr_engine not in {"", "None"}
+            if pipeline_options.do_ocr:
+                ocr_factory = get_ocr_factory(allow_external_plugins=False)
+                ocr_options = ocr_factory.create_options(kind=ocr_engine)
+                pipeline_options.ocr_options = ocr_options
+            pipeline_options.do_picture_classification = do_picture_classification
+            pic_desc_llm = _deserialize_pydantic_model(pic_desc_config)
             logger.info("Docling enabling the picture description stage.")
             pipeline_options.do_picture_description = True
             pipeline_options.allow_external_plugins = True
@@ -210,33 +345,24 @@ def docling_worker(
                 llm=pic_desc_llm,
                 prompt=pic_desc_prompt,
             )
-        return pipeline_options
-    # Configure the VLM pipeline
-    def _get_vlm_opts() -> VlmPipelineOptions:
-        check_shutdown()  # Check before heavy operations
-        return VlmPipelineOptions()
-    # Configure the main format options and create the DocumentConverter()
-    def _get_converter() -> DocumentConverter:
-        check_shutdown()  # Check before heavy operations
-        if pipeline == "standard":
-            pdf_format_option = PdfFormatOption(
-                pipeline_options=_get_standard_opts(),
-            )
-        elif pipeline == "vlm":
-            pdf_format_option = PdfFormatOption(pipeline_cls=VlmPipeline, pipeline_options=_get_vlm_opts())
-        else:
-            msg = f"Unknown pipeline: {pipeline!r}"
-            raise ValueError(msg)
-        format_options: dict[InputFormat, FormatOption] = {
-            InputFormat.PDF: pdf_format_option,
-            InputFormat.IMAGE: pdf_format_option,
-        }
-        return DocumentConverter(format_options=format_options)
+            pdf_format_option = PdfFormatOption(pipeline_options=pipeline_options)
+            format_options: dict[InputFormat, FormatOption] = {
+                InputFormat.PDF: pdf_format_option,
+                InputFormat.IMAGE: pdf_format_option,
+            }
+            return DocumentConverter(format_options=format_options)
+        # Use cached converter - this is where the magic happens!
+        # First run: creates and caches converter (15-20 min)
+        # Subsequent runs: reuses cached converter (seconds)
+        pic_desc_config_hash = None  # Will be None since we checked above
+        return _get_cached_converter(
+            pipeline=pipeline,
+            ocr_engine=ocr_engine,
+            do_picture_classification=do_picture_classification,
+            pic_desc_config_hash=pic_desc_config_hash,
+        )
     try:
         # Check for shutdown before creating converter (can be slow)

lfx/base/data/storage_utils.py CHANGED Viewed

@@ -190,3 +190,112 @@ def file_exists(file_path: str, storage_service: StorageService | None = None) -
         return False
     else:
         return True
+# Magic bytes signatures for common image formats
+MIN_IMAGE_HEADER_SIZE = 12  # Minimum bytes needed to detect image type
+IMAGE_SIGNATURES: dict[str, list[tuple[bytes, int]]] = {
+    "jpeg": [(b"\xff\xd8\xff", 0)],
+    "jpg": [(b"\xff\xd8\xff", 0)],
+    "png": [(b"\x89PNG\r\n\x1a\n", 0)],
+    "gif": [(b"GIF87a", 0), (b"GIF89a", 0)],
+    "webp": [(b"RIFF", 0)],  # WebP starts with RIFF, then has WEBP at offset 8
+    "bmp": [(b"BM", 0)],
+    "tiff": [(b"II*\x00", 0), (b"MM\x00*", 0)],  # Little-endian and big-endian TIFF
+}
+def detect_image_type_from_bytes(content: bytes) -> str | None:
+    """Detect the actual image type from file content using magic bytes.
+    Args:
+        content: The file content bytes (at least first 12 bytes needed)
+    Returns:
+        str | None: The detected image type (e.g., "jpeg", "png") or None if not recognized
+    """
+    if len(content) < MIN_IMAGE_HEADER_SIZE:
+        return None
+    # Check WebP specifically (needs to check both RIFF and WEBP)
+    if content[:4] == b"RIFF" and content[8:12] == b"WEBP":
+        return "webp"
+    # Check other image signatures
+    for image_type, signatures in IMAGE_SIGNATURES.items():
+        if image_type == "webp":
+            continue  # Already handled above
+        for signature, offset in signatures:
+            if content[offset : offset + len(signature)] == signature:
+                return image_type
+    return None
+def validate_image_content_type(
+    file_path: str,
+    content: bytes | None = None,
+    storage_service: StorageService | None = None,
+    resolve_path: Callable[[str], str] | None = None,
+) -> tuple[bool, str | None]:
+    """Validate that an image file's content matches its declared extension.
+    This prevents errors like "Image does not match the provided media type image/png"
+    when a JPEG file is saved with a .png extension.
+    Only rejects files when we can definitively detect a mismatch. Files with
+    unrecognized content are allowed through (they may fail later, but that's
+    better than false positives blocking valid files).
+    Args:
+        file_path: Path to the image file
+        content: Optional pre-read file content bytes. If not provided, will read from file.
+        storage_service: Optional storage service instance for S3 files
+        resolve_path: Optional function to resolve relative paths
+    Returns:
+        tuple[bool, str | None]: (is_valid, error_message)
+            - (True, None) if the content matches the extension, is unrecognized, or file is not an image
+            - (False, error_message) if there's a definite mismatch
+    """
+    # Get the file extension
+    path_obj = Path(file_path)
+    extension = path_obj.suffix[1:].lower() if path_obj.suffix else ""
+    # Only validate image files
+    image_extensions = {"jpeg", "jpg", "png", "gif", "webp", "bmp", "tiff"}
+    if extension not in image_extensions:
+        return True, None
+    # Read content if not provided
+    if content is None:
+        try:
+            content = run_until_complete(read_file_bytes(file_path, storage_service, resolve_path))
+        except (FileNotFoundError, ValueError):
+            # Can't read file - let it pass, will fail later with better error
+            return True, None
+    # Detect actual image type
+    detected_type = detect_image_type_from_bytes(content)
+    # If we can't detect the type, the file is not a valid image
+    if detected_type is None:
+        return False, (
+            f"File '{path_obj.name}' has extension '.{extension}' but its content "
+            f"is not a valid image format. The file may be corrupted, empty, or not a real image."
+        )
+    # Normalize extensions for comparison (jpg == jpeg, tif == tiff)
+    extension_normalized = "jpeg" if extension == "jpg" else extension
+    detected_normalized = "jpeg" if detected_type == "jpg" else detected_type
+    if extension_normalized != detected_normalized:
+        return False, (
+            f"File '{path_obj.name}' has extension '.{extension}' but contains "
+            f"'{detected_type.upper()}' image data. This mismatch will cause API errors. "
+            f"Please rename the file with the correct extension '.{detected_type}' or "
+            f"re-save it in the correct format."
+        )
+    return True, None

lfx/base/datastax/astradb_base.py CHANGED Viewed

@@ -187,34 +187,38 @@ class AstraDBBaseComponent(Component):
     @classmethod
     def map_cloud_providers(cls, token: str, environment: str | None = None) -> dict[str, dict[str, Any]]:
         """Fetch all available cloud providers and regions."""
-        # Get the admin object
-        client = DataAPIClient(environment=cls.get_environment(environment))
-        admin_client = client.get_admin(token=token)
-        # Get the list of available regions
-        available_regions = admin_client.find_available_regions(only_org_enabled_regions=True)
+        try:
+            # Get the admin object
+            client = DataAPIClient(environment=cls.get_environment(environment))
+            admin_client = client.get_admin(token=token)
-        provider_mapping: dict[str, dict[str, str]] = {
-            "AWS": {"name": "Amazon Web Services", "id": "aws"},
-            "GCP": {"name": "Google Cloud Platform", "id": "gcp"},
-            "Azure": {"name": "Microsoft Azure", "id": "azure"},
-        }
+            # Get the list of available regions
+            available_regions = admin_client.find_available_regions(only_org_enabled_regions=True)
-        result: dict[str, dict[str, Any]] = {}
-        for region_info in available_regions:
-            cloud_provider = region_info.cloud_provider
-            region = region_info.name
+            provider_mapping: dict[str, dict[str, str]] = {
+                "AWS": {"name": "Amazon Web Services", "id": "aws"},
+                "GCP": {"name": "Google Cloud Platform", "id": "gcp"},
+                "Azure": {"name": "Microsoft Azure", "id": "azure"},
+            }
-            if cloud_provider in provider_mapping:
-                provider_name = provider_mapping[cloud_provider]["name"]
-                provider_id = provider_mapping[cloud_provider]["id"]
+            result: dict[str, dict[str, Any]] = {}
+            for region_info in available_regions:
+                cloud_provider = region_info.cloud_provider
+                region = region_info.name
-                if provider_name not in result:
-                    result[provider_name] = {"id": provider_id, "regions": []}
+                if cloud_provider in provider_mapping:
+                    provider_name = provider_mapping[cloud_provider]["name"]
+                    provider_id = provider_mapping[cloud_provider]["id"]
-                result[provider_name]["regions"].append(region)
+                    if provider_name not in result:
+                        result[provider_name] = {"id": provider_id, "regions": []}
-        return result
+                    result[provider_name]["regions"].append(region)
+        except Exception as e:  # noqa: BLE001
+            logger.debug("Error fetching cloud providers: %s", e)
+            return {}
+        else:
+            return result
     @classmethod
     def get_vectorize_providers(cls, token: str, environment: str | None = None, api_endpoint: str | None = None):
@@ -327,48 +331,52 @@ class AstraDBBaseComponent(Component):
     @classmethod
     def get_database_list_static(cls, token: str, environment: str | None = None):
-        environment = cls.get_environment(environment)
-        client = DataAPIClient(environment=environment)
-        # Get the admin object
-        admin_client = client.get_admin(token=token)
+        try:
+            environment = cls.get_environment(environment)
+            client = DataAPIClient(environment=environment)
-        # Get the list of databases
-        db_list = admin_client.list_databases()
+            # Get the admin object
+            admin_client = client.get_admin(token=token)
-        # Generate the api endpoint for each database
-        db_info_dict = {}
-        for db in db_list:
-            try:
-                # Get the API endpoint for the database
-                api_endpoints = [db_reg.api_endpoint for db_reg in db.regions]
+            # Get the list of databases
+            db_list = admin_client.list_databases()
-                # Get the number of collections
+            # Generate the api endpoint for each database
+            db_info_dict = {}
+            for db in db_list:
                 try:
-                    # Get the number of collections in the database
-                    num_collections = len(
-                        client.get_database(
-                            api_endpoints[0],
-                            token=token,
-                        ).list_collection_names()
-                    )
-                except Exception:  # noqa: BLE001
-                    if db.status != "PENDING":
-                        continue
-                    num_collections = 0
-                # Add the database to the dictionary
-                db_info_dict[db.name] = {
-                    "api_endpoints": api_endpoints,
-                    "keyspaces": db.keyspaces,
-                    "collections": num_collections,
-                    "status": db.status if db.status != "ACTIVE" else None,
-                    "org_id": db.org_id if db.org_id else None,
-                }
-            except Exception as e:  # noqa: BLE001
-                logger.debug("Failed to get metadata for database %s: %s", db.name, e)
-        return db_info_dict
+                    # Get the API endpoint for the database
+                    api_endpoints = [db_reg.api_endpoint for db_reg in db.regions]
+                    # Get the number of collections
+                    try:
+                        # Get the number of collections in the database
+                        num_collections = len(
+                            client.get_database(
+                                api_endpoints[0],
+                                token=token,
+                            ).list_collection_names()
+                        )
+                    except Exception:  # noqa: BLE001
+                        if db.status != "PENDING":
+                            continue
+                        num_collections = 0
+                    # Add the database to the dictionary
+                    db_info_dict[db.name] = {
+                        "api_endpoints": api_endpoints,
+                        "keyspaces": db.keyspaces,
+                        "collections": num_collections,
+                        "status": db.status if db.status != "ACTIVE" else None,
+                        "org_id": db.org_id if db.org_id else None,
+                    }
+                except Exception as e:  # noqa: BLE001
+                    logger.debug("Failed to get metadata for database %s: %s", db.name, e)
+        except Exception as e:  # noqa: BLE001
+            logger.debug("Error fetching database list: %s", e)
+            return {}
+        else:
+            return db_info_dict
     def get_database_list(self):
         return self.get_database_list_static(
@@ -467,6 +475,9 @@ class AstraDBBaseComponent(Component):
     def _initialize_database_options(self):
         try:
+            db_list = self.get_database_list()
+            if not db_list:
+                return []
             return [
                 {
                     "name": name,
@@ -476,11 +487,11 @@ class AstraDBBaseComponent(Component):
                     "keyspaces": info["keyspaces"],
                     "org_id": info["org_id"],
                 }
-                for name, info in self.get_database_list().items()
+                for name, info in db_list.items()
             ]
-        except Exception as e:
-            msg = f"Error fetching database options: {e}"
-            raise ValueError(msg) from e
+        except Exception as e:  # noqa: BLE001
+            logger.debug("Error fetching database options: %s", e)
+            return []
     @classmethod
     def get_provider_icon(cls, collection=None, provider_name: str | None = None) -> str:

lfx/base/mcp/util.py CHANGED Viewed

@@ -23,6 +23,7 @@ from pydantic import BaseModel
 from lfx.log.logger import logger
 from lfx.schema.json_schema import create_input_schema_from_json_schema
 from lfx.services.deps import get_settings_service
+from lfx.utils.async_helpers import run_until_complete
 HTTP_ERROR_STATUS_CODE = httpx_codes.BAD_REQUEST  # HTTP status code for client errors
@@ -351,8 +352,7 @@ def create_tool_func(tool_name: str, arg_schema: type[BaseModel], client) -> Cal
             _handle_tool_validation_error(e, tool_name, provided_args, arg_schema)
         try:
-            loop = asyncio.get_event_loop()
-            return loop.run_until_complete(client.run_tool(tool_name, arguments=validated.model_dump()))
+            return run_until_complete(client.run_tool(tool_name, arguments=validated.model_dump()))
         except Exception as e:
             logger.error(f"Tool '{tool_name}' execution failed: {e}")
             # Re-raise with more context

lfx/base/models/__init__.py CHANGED Viewed

@@ -1,3 +1,13 @@
 from .model import LCModelComponent
+from .unified_models import (
+    get_model_provider_variable_mapping,
+    get_model_providers,
+    get_unified_models_detailed,
+)
-__all__ = ["LCModelComponent"]
+__all__ = [
+    "LCModelComponent",
+    "get_model_provider_variable_mapping",
+    "get_model_providers",
+    "get_unified_models_detailed",
+]

lfx-nightly 0.2.0.dev26__py3-none-any.whl → 0.2.1.dev7__py3-none-any.whl

lfx-nightly 0.2.0.dev26py3-none-any.whl → 0.2.1.dev7py3-none-any.whl