PyPI - langflow-base-nightly - Versions diffs - 0.5.0.dev34__py3-none-any.whl → 0.5.0.dev36__py3-none-any.whl - Mend

langflow-base-nightly 0.5.0.dev34py3-none-any.whl → 0.5.0.dev36py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

langflow/alembic/versions/1cb603706752_modify_uniqueness_constraint_on_file_.py ADDED Viewed

@@ -0,0 +1,279 @@
+"""Modify uniqueness constraint on file names
+Revision ID: 1cb603706752
+Revises: 3162e83e485f
+Create Date: 2025-07-24 07:02:14.896583
+"""
+from __future__ import annotations
+import logging
+import re
+import time
+from typing import Sequence, Union, Iterable, Optional, Set, Tuple
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy import inspect
+# revision identifiers, used by Alembic.
+revision: str = "1cb603706752"
+down_revision: Union[str, None] = "3162e83e485f"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+logger = logging.getLogger(__name__)
+# Behavior constants
+DUPLICATE_SUFFIX_START = 2  # first suffix to use, e.g., "name_2.ext"
+BATCH_SIZE = 1000  # Process duplicates in batches for large datasets
+def _get_unique_constraints_by_columns(
+    inspector, table: str, expected_cols: Iterable[str]
+) -> Optional[str]:
+    """Return the name of a unique constraint that matches the exact set of expected columns."""
+    expected = set(expected_cols)
+    for c in inspector.get_unique_constraints(table):
+        cols = set(c.get("column_names") or [])
+        if cols == expected:
+            return c.get("name")
+    return None
+def _split_base_ext(name: str) -> Tuple[str, str]:
+    """Split a filename into (base, ext) where ext does not include the leading dot; ext may be ''."""
+    if "." in name:
+        base, ext = name.rsplit(".", 1)
+        return base, ext
+    return name, ""
+def _escape_like(s: str) -> str:
+    # escape backslash first, then SQL LIKE wildcards
+    return s.replace("\\", "\\\\").replace("%", r"\%").replace("_", r"\_")
+def _like_for_suffixes(base: str, ext: str) -> str:
+    eb = _escape_like(base)
+    if ext:
+        ex = ext.replace("%", r"\%").replace("_", r"\_")
+        return f"{eb}\\_%." + ex  # literal underscore
+    else:
+        return f"{eb}\\_%"
+def _next_available_name(conn, user_id: str, base_name: str) -> str:
+    """
+    Compute the next available non-conflicting name for a given user.
+    Handles names with or without extensions and existing _N suffixes.
+    """
+    base, ext = _split_base_ext(base_name)
+    # Load all sibling names once
+    rows = conn.execute(
+        sa.text("""
+            SELECT name
+            FROM file
+            WHERE user_id = :uid
+            AND (name = :base_name OR name LIKE :like ESCAPE '\\')
+        """),
+        {"uid": user_id, "base_name": base_name, "like": _like_for_suffixes(base, ext)},
+    ).scalars().all()
+    taken: Set[str] = set(rows)
+    # Pattern to detect base_N(.ext) and capture N
+    if ext:
+        rx = re.compile(rf"^{re.escape(base)}_(\d+)\.{re.escape(ext)}$")
+    else:
+        rx = re.compile(rf"^{re.escape(base)}_(\d+)$")
+    max_n = 1
+    for n in rows:
+        m = rx.match(n)
+        if m:
+            max_n = max(max_n, int(m.group(1)))
+    n = max(max_n + 1, DUPLICATE_SUFFIX_START)
+    while True:
+        candidate = f"{base}_{n}.{ext}" if ext else f"{base}_{n}"
+        if candidate not in taken:
+            return candidate
+        n += 1
+def _handle_duplicates_before_upgrade(conn) -> None:
+    """
+    Ensure (user_id, name) is unique by renaming older duplicates before adding the composite unique constraint.
+    Keeps the most recently updated/created/id-highest record; renames the rest with _N suffix.
+    """
+    logger.info("Scanning for duplicate file names per user...")
+    duplicates = conn.execute(
+        sa.text(
+            """
+            SELECT user_id, name, COUNT(*) AS cnt
+            FROM file
+            GROUP BY user_id, name
+            HAVING COUNT(*) > 1
+            """
+        )
+    ).fetchall()
+    if not duplicates:
+        logger.info("No duplicates found.")
+        return
+    logger.info("Found %d duplicate sets. Resolving...", len(duplicates))
+    # Add progress indicator for large datasets
+    if len(duplicates) > 100:
+        logger.info("Large number of duplicates detected. This may take several minutes...")
+    # Wrap in a nested transaction so we fail cleanly on any error
+    with conn.begin_nested():
+        # Process duplicates in batches for better performance on large datasets
+        for batch_start in range(0, len(duplicates), BATCH_SIZE):
+            batch_end = min(batch_start + BATCH_SIZE, len(duplicates))
+            batch = duplicates[batch_start:batch_end]
+            if len(duplicates) > BATCH_SIZE:
+                logger.info("Processing batch %d-%d of %d duplicate sets...",
+                           batch_start + 1, batch_end, len(duplicates))
+            for user_id, name, cnt in batch:
+                logger.debug("Resolving duplicates for user=%s, name=%r (count=%s)", user_id, name, cnt)
+                file_ids = conn.execute(
+                    sa.text(
+                        """
+                        SELECT id
+                        FROM file
+                        WHERE user_id = :uid AND name = :name
+                        ORDER BY updated_at DESC, created_at DESC, id DESC
+                        """
+                    ),
+                    {"uid": user_id, "name": name},
+                ).scalars().all()
+                # Keep the first (most recent), rename the rest
+                for file_id in file_ids[1:]:
+                    new_name = _next_available_name(conn, user_id, name)
+                    conn.execute(
+                        sa.text("UPDATE file SET name = :new_name WHERE id = :fid"),
+                        {"new_name": new_name, "fid": file_id},
+                    )
+                    logger.debug("Renamed id=%s: %r -> %r", file_id, name, new_name)
+            # Progress update for large batches
+            if len(duplicates) > BATCH_SIZE and batch_end < len(duplicates):
+                logger.info("Completed %d of %d duplicate sets (%.1f%%)",
+                           batch_end, len(duplicates), (batch_end / len(duplicates)) * 100)
+    logger.info("Duplicate resolution completed.")
+def upgrade() -> None:
+    start_time = time.time()
+    logger.info("Starting upgrade: adding composite unique (name, user_id) on file")
+    conn = op.get_bind()
+    inspector = inspect(conn)
+    # 1) Resolve pre-existing duplicates so the new unique can be created
+    duplicate_start = time.time()
+    _handle_duplicates_before_upgrade(conn)
+    duplicate_duration = time.time() - duplicate_start
+    if duplicate_duration > 1.0:  # Only log if it took more than 1 second
+        logger.info("Duplicate resolution completed in %.2f seconds", duplicate_duration)
+    # 2) Detect existing single-column unique on name (if any)
+    inspector = inspect(conn)  # refresh inspector
+    single_name_uc = _get_unique_constraints_by_columns(inspector, "file", {"name"})
+    composite_uc = _get_unique_constraints_by_columns(inspector, "file", {"name", "user_id"})
+    # 3) Use a unified, reflection-based batch_alter_table for both Postgres and SQLite.
+    #    recreate="always" ensures a safe table rebuild on SQLite and a standard alter on Postgres.
+    constraint_start = time.time()
+    with op.batch_alter_table("file", recreate="always") as batch_op:
+        # Drop old single-column unique if present
+        if single_name_uc:
+            logger.info("Dropping existing single-column unique: %s", single_name_uc)
+            batch_op.drop_constraint(single_name_uc, type_="unique")
+        # Create composite unique if not already present
+        if not composite_uc:
+            logger.info("Creating composite unique: file_name_user_id_key on (name, user_id)")
+            batch_op.create_unique_constraint("file_name_user_id_key", ["name", "user_id"])
+        else:
+            logger.info("Composite unique already present: %s", composite_uc)
+    constraint_duration = time.time() - constraint_start
+    if constraint_duration > 1.0:  # Only log if it took more than 1 second
+        logger.info("Constraint operations completed in %.2f seconds", constraint_duration)
+    total_duration = time.time() - start_time
+    logger.info("Upgrade completed successfully in %.2f seconds", total_duration)
+def downgrade() -> None:
+    start_time = time.time()
+    logger.info("Starting downgrade: reverting to single-column unique on (name)")
+    conn = op.get_bind()
+    inspector = inspect(conn)
+    # 1) Ensure no cross-user duplicates on name (since we'll enforce global uniqueness on name)
+    logger.info("Checking for cross-user duplicate names prior to downgrade...")
+    validation_start = time.time()
+    dup_names = conn.execute(
+        sa.text(
+            """
+            SELECT name, COUNT(*) AS cnt
+            FROM file
+            GROUP BY name
+            HAVING COUNT(*) > 1
+            """
+        )
+    ).fetchall()
+    validation_duration = time.time() - validation_start
+    if validation_duration > 1.0:  # Only log if it took more than 1 second
+        logger.info("Validation completed in %.2f seconds", validation_duration)
+    if dup_names:
+        examples = [row[0] for row in dup_names[:10]]
+        raise RuntimeError(
+            "Downgrade aborted: duplicate names exist across users. "
+            f"Examples: {examples}{'...' if len(dup_names) > 10 else ''}. "
+            "Rename conflicting files before downgrading."
+        )
+    # 2) Detect constraints
+    inspector = inspect(conn)  # refresh
+    composite_uc = _get_unique_constraints_by_columns(inspector, "file", {"name", "user_id"})
+    single_name_uc = _get_unique_constraints_by_columns(inspector, "file", {"name"})
+    # 3) Perform alteration using batch with reflect to preserve other objects
+    constraint_start = time.time()
+    with op.batch_alter_table("file", recreate="always") as batch_op:
+        if composite_uc:
+            logger.info("Dropping composite unique: %s", composite_uc)
+            batch_op.drop_constraint(composite_uc, type_="unique")
+        else:
+            logger.info("No composite unique found to drop.")
+        if not single_name_uc:
+            logger.info("Creating single-column unique: file_name_key on (name)")
+            batch_op.create_unique_constraint("file_name_key", ["name"])
+        else:
+            logger.info("Single-column unique already present: %s", single_name_uc)
+    constraint_duration = time.time() - constraint_start
+    if constraint_duration > 1.0:  # Only log if it took more than 1 second
+        logger.info("Constraint operations completed in %.2f seconds", constraint_duration)
+    total_duration = time.time() - start_time
+    logger.info("Downgrade completed successfully in %.2f seconds", total_duration)

langflow/components/agents/mcp_component.py CHANGED Viewed

@@ -16,14 +16,14 @@ from langflow.base.mcp.util import (
 )
 from langflow.custom.custom_component.component_with_cache import ComponentWithCache
 from langflow.inputs.inputs import InputTypes  # noqa: TC001
-from langflow.io import DropdownInput, McpInput, MessageTextInput, Output
+from langflow.io import DropdownInput, McpInput, MessageTextInput, Output, SecretStrInput
 from langflow.io.schema import flatten_schema, schema_to_langflow_inputs
 from langflow.logging import logger
 from langflow.schema.dataframe import DataFrame
 from langflow.schema.message import Message
-from langflow.services.auth.utils import create_user_longterm_token
 # Import get_server from the backend API
+from langflow.services.auth.utils import create_user_longterm_token, get_current_user
 from langflow.services.database.models.user.crud import get_user_by_id
 from langflow.services.deps import get_session, get_settings_service, get_storage_service
@@ -96,6 +96,13 @@ class MCPToolsComponent(ComponentWithCache):
             show=False,
             tool_mode=False,
         ),
+        SecretStrInput(
+            name="api_key",
+            display_name="Langflow API Key",
+            info="Langflow API key for authentication when fetching MCP servers and tools.",
+            required=False,
+            advanced=True,
+        ),
     ]
     outputs = [
@@ -155,8 +162,18 @@ class MCPToolsComponent(ComponentWithCache):
         try:
             async for db in get_session():
-                user_id, _ = await create_user_longterm_token(db)
-                current_user = await get_user_by_id(db, user_id)
+                # TODO: In 1.6, this may need to be removed or adjusted
+                # Try to get the super user token, if possible
+                if self.api_key:
+                    current_user = await get_current_user(
+                        token=None,
+                        query_param=self.api_key,
+                        header_param=None,
+                        db=db,
+                    )
+                else:
+                    user_id, _ = await create_user_longterm_token(db)
+                    current_user = await get_user_by_id(db, user_id)
                 # Try to get server config from DB/API
                 server_config = await get_server(

langflow/components/data/kb_ingest.py CHANGED Viewed

@@ -139,8 +139,8 @@ class KBIngestionComponent(Component):
                 {
                     "column_name": "text",
                     "vectorize": True,
-                    "identifier": False,
-                }
+                    "identifier": True,
+                },
             ],
         ),
         IntInput(
@@ -187,9 +187,8 @@ class KBIngestionComponent(Component):
         df_columns = set(df_source.columns)
         for config in config_list:
             col_name = config.get("column_name")
-            if col_name not in df_columns and not self.silent_errors:
+            if col_name not in df_columns:
                 msg = f"Column '{col_name}' not found in DataFrame. Available columns: {sorted(df_columns)}"
-                self.log(f"Warning: {msg}")
                 raise ValueError(msg)
         return config_list
@@ -295,9 +294,7 @@ class KBIngestionComponent(Component):
             if not cfg_path.exists():
                 cfg_path.write_text(json.dumps(config_list, indent=2))
-        except Exception as e:
-            if not self.silent_errors:
-                raise
+        except (OSError, TypeError, ValueError) as e:
             self.log(f"Error saving KB files: {e}")
     def _build_column_metadata(self, config_list: list[dict[str, Any]], df_source: pd.DataFrame) -> dict[str, Any]:
@@ -367,9 +364,7 @@ class KBIngestionComponent(Component):
                 chroma.add_documents(documents)
                 self.log(f"Added {len(documents)} documents to vector store '{self.knowledge_base}'")
-        except Exception as e:
-            if not self.silent_errors:
-                raise
+        except (OSError, ValueError, RuntimeError) as e:
             self.log(f"Error creating vector store: {e}")
     def _convert_df_to_data_objects(self, df_source: pd.DataFrame, config_list: list[dict[str, Any]]) -> list[Data]:
@@ -407,16 +402,22 @@ class KBIngestionComponent(Component):
         # Convert each row to a Data object
         for _, row in df_source.iterrows():
-            # Build content text from vectorized columns using list comprehension
-            content_parts = [str(row[col]) for col in content_cols if col in row and pd.notna(row[col])]
+            # Build content text from identifier columns using list comprehension
+            identifier_parts = [str(row[col]) for col in content_cols if col in row and pd.notna(row[col])]
-            page_content = " ".join(content_parts)
+            # Join all parts into a single string
+            page_content = " ".join(identifier_parts)
             # Build metadata from NON-vectorized columns only (simple key-value pairs)
             data_dict = {
                 "text": page_content,  # Main content for vectorization
             }
+            # Add identifier columns if they exist
+            if identifier_cols:
+                identifier_parts = [str(row[col]) for col in identifier_cols if col in row and pd.notna(row[col])]
+                page_content = " ".join(identifier_parts)
             # Add metadata columns as simple key-value pairs
             for col in df_source.columns:
                 if col not in content_cols and col in row and pd.notna(row[col]):
@@ -526,9 +527,7 @@ class KBIngestionComponent(Component):
             return Data(data=meta)
-        except Exception as e:
-            if not self.silent_errors:
-                raise
+        except (OSError, ValueError, RuntimeError, KeyError) as e:
             self.log(f"Error in KB ingestion: {e}")
             self.status = f"❌ KB ingestion failed: {e}"
             return Data(data={"error": str(e), "kb_name": self.knowledge_base})

langflow/components/docling/__init__.py CHANGED Viewed

@@ -1,7 +1,13 @@
 from __future__ import annotations
+import signal
+import sys
+import traceback
+from contextlib import suppress
 from typing import TYPE_CHECKING, Any
+from loguru import logger
 from langflow.components._importing import import_mod
 if TYPE_CHECKING:
@@ -41,3 +47,195 @@ def __getattr__(attr_name: str) -> Any:
 def __dir__() -> list[str]:
     return list(__all__)
+def docling_worker(file_paths: list[str], queue, pipeline: str, ocr_engine: str):
+    """Worker function for processing files with Docling in a separate process."""
+    # Signal handling for graceful shutdown
+    shutdown_requested = False
+    def signal_handler(signum: int, frame) -> None:  # noqa: ARG001
+        """Handle shutdown signals gracefully."""
+        nonlocal shutdown_requested
+        signal_names: dict[int, str] = {signal.SIGTERM: "SIGTERM", signal.SIGINT: "SIGINT"}
+        signal_name = signal_names.get(signum, f"signal {signum}")
+        logger.debug(f"Docling worker received {signal_name}, initiating graceful shutdown...")
+        shutdown_requested = True
+        # Send shutdown notification to parent process
+        with suppress(Exception):
+            queue.put({"error": f"Worker interrupted by {signal_name}", "shutdown": True})
+        # Exit gracefully
+        sys.exit(0)
+    def check_shutdown() -> None:
+        """Check if shutdown was requested and exit if so."""
+        if shutdown_requested:
+            logger.info("Shutdown requested, exiting worker...")
+            with suppress(Exception):
+                queue.put({"error": "Worker shutdown requested", "shutdown": True})
+            sys.exit(0)
+    # Register signal handlers early
+    try:
+        signal.signal(signal.SIGTERM, signal_handler)
+        signal.signal(signal.SIGINT, signal_handler)
+        logger.debug("Signal handlers registered for graceful shutdown")
+    except (OSError, ValueError) as e:
+        # Some signals might not be available on all platforms
+        logger.warning(f"Warning: Could not register signal handlers: {e}")
+    # Check for shutdown before heavy imports
+    check_shutdown()
+    try:
+        from docling.datamodel.base_models import ConversionStatus, InputFormat
+        from docling.datamodel.pipeline_options import (
+            OcrOptions,
+            PdfPipelineOptions,
+            VlmPipelineOptions,
+        )
+        from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
+        from docling.models.factories import get_ocr_factory
+        from docling.pipeline.vlm_pipeline import VlmPipeline
+        # Check for shutdown after imports
+        check_shutdown()
+        logger.debug("Docling dependencies loaded successfully")
+    except ModuleNotFoundError:
+        msg = (
+            "Docling is an optional dependency of Langflow. "
+            "Install with `uv pip install 'langflow[docling]'` "
+            "or refer to the documentation"
+        )
+        queue.put({"error": msg})
+        return
+    except ImportError as e:
+        # A different import failed (e.g., a transitive dependency); preserve details.
+        queue.put({"error": f"Failed to import a Docling dependency: {e}"})
+        return
+    except KeyboardInterrupt:
+        logger.warning("KeyboardInterrupt during imports, exiting...")
+        queue.put({"error": "Worker interrupted during imports", "shutdown": True})
+        return
+    # Configure the standard PDF pipeline
+    def _get_standard_opts() -> PdfPipelineOptions:
+        check_shutdown()  # Check before heavy operations
+        pipeline_options = PdfPipelineOptions()
+        pipeline_options.do_ocr = ocr_engine != ""
+        if pipeline_options.do_ocr:
+            ocr_factory = get_ocr_factory(
+                allow_external_plugins=False,
+            )
+            ocr_options: OcrOptions = ocr_factory.create_options(
+                kind=ocr_engine,
+            )
+            pipeline_options.ocr_options = ocr_options
+        return pipeline_options
+    # Configure the VLM pipeline
+    def _get_vlm_opts() -> VlmPipelineOptions:
+        check_shutdown()  # Check before heavy operations
+        return VlmPipelineOptions()
+    # Configure the main format options and create the DocumentConverter()
+    def _get_converter() -> DocumentConverter:
+        check_shutdown()  # Check before heavy operations
+        if pipeline == "standard":
+            pdf_format_option = PdfFormatOption(
+                pipeline_options=_get_standard_opts(),
+            )
+        elif pipeline == "vlm":
+            pdf_format_option = PdfFormatOption(pipeline_cls=VlmPipeline, pipeline_options=_get_vlm_opts())
+        else:
+            msg = f"Unknown pipeline: {pipeline!r}"
+            raise ValueError(msg)
+        format_options: dict[InputFormat, FormatOption] = {
+            InputFormat.PDF: pdf_format_option,
+            InputFormat.IMAGE: pdf_format_option,
+        }
+        return DocumentConverter(format_options=format_options)
+    try:
+        # Check for shutdown before creating converter (can be slow)
+        check_shutdown()
+        logger.info(f"Initializing {pipeline} pipeline with OCR: {ocr_engine or 'disabled'}")
+        converter = _get_converter()
+        # Check for shutdown before processing files
+        check_shutdown()
+        logger.info(f"Starting to process {len(file_paths)} files...")
+        # Process files with periodic shutdown checks
+        results = []
+        for i, file_path in enumerate(file_paths):
+            # Check for shutdown before processing each file
+            check_shutdown()
+            logger.debug(f"Processing file {i + 1}/{len(file_paths)}: {file_path}")
+            try:
+                # Process single file (we can't easily interrupt convert_all)
+                single_result = converter.convert_all([file_path])
+                results.extend(single_result)
+                # Check for shutdown after each file
+                check_shutdown()
+            except (OSError, ValueError, RuntimeError, ImportError) as file_error:
+                # Handle specific file processing errors
+                logger.error(f"Error processing file {file_path}: {file_error}")
+                # Continue with other files, but check for shutdown
+                check_shutdown()
+            except Exception as file_error:  # noqa: BLE001
+                # Catch any other unexpected errors to prevent worker crash
+                logger.error(f"Unexpected error processing file {file_path}: {file_error}")
+                # Continue with other files, but check for shutdown
+                check_shutdown()
+        # Final shutdown check before sending results
+        check_shutdown()
+        # Process the results while maintaining the original structure
+        processed_data = [
+            {"document": res.document, "file_path": str(res.input.file), "status": res.status.name}
+            if res.status == ConversionStatus.SUCCESS
+            else None
+            for res in results
+        ]
+        logger.info(f"Successfully processed {len([d for d in processed_data if d])} files")
+        queue.put(processed_data)
+    except KeyboardInterrupt:
+        logger.warning("KeyboardInterrupt during processing, exiting gracefully...")
+        queue.put({"error": "Worker interrupted during processing", "shutdown": True})
+        return
+    except Exception as e:  # noqa: BLE001
+        if shutdown_requested:
+            logger.exception("Exception occurred during shutdown, exiting...")
+            return
+        # Send any processing error to the main process with traceback
+        error_info = {"error": str(e), "traceback": traceback.format_exc()}
+        logger.error(f"Error in worker: {error_info}")
+        queue.put(error_info)
+    finally:
+        logger.info("Docling worker finishing...")
+        # Ensure we don't leave any hanging processes
+        if shutdown_requested:
+            logger.debug("Worker shutdown completed")
+        else:
+            logger.debug("Worker completed normally")

langflow-base-nightly 0.5.0.dev34__py3-none-any.whl → 0.5.0.dev36__py3-none-any.whl

langflow-base-nightly 0.5.0.dev34py3-none-any.whl → 0.5.0.dev36py3-none-any.whl