langflow-base-nightly 0.5.0.dev34__py3-none-any.whl → 0.5.0.dev36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,279 @@
1
+ """Modify uniqueness constraint on file names
2
+
3
+ Revision ID: 1cb603706752
4
+ Revises: 3162e83e485f
5
+ Create Date: 2025-07-24 07:02:14.896583
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ import re
12
+ import time
13
+ from typing import Sequence, Union, Iterable, Optional, Set, Tuple
14
+
15
+ from alembic import op
16
+ import sqlalchemy as sa
17
+ from sqlalchemy import inspect
18
+
19
+ # revision identifiers, used by Alembic.
20
+ revision: str = "1cb603706752"
21
+ down_revision: Union[str, None] = "3162e83e485f"
22
+ branch_labels: Union[str, Sequence[str], None] = None
23
+ depends_on: Union[str, Sequence[str], None] = None
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ # Behavior constants
28
+ DUPLICATE_SUFFIX_START = 2 # first suffix to use, e.g., "name_2.ext"
29
+ BATCH_SIZE = 1000 # Process duplicates in batches for large datasets
30
+
31
+
32
+ def _get_unique_constraints_by_columns(
33
+ inspector, table: str, expected_cols: Iterable[str]
34
+ ) -> Optional[str]:
35
+ """Return the name of a unique constraint that matches the exact set of expected columns."""
36
+ expected = set(expected_cols)
37
+ for c in inspector.get_unique_constraints(table):
38
+ cols = set(c.get("column_names") or [])
39
+ if cols == expected:
40
+ return c.get("name")
41
+ return None
42
+
43
+
44
+ def _split_base_ext(name: str) -> Tuple[str, str]:
45
+ """Split a filename into (base, ext) where ext does not include the leading dot; ext may be ''."""
46
+ if "." in name:
47
+ base, ext = name.rsplit(".", 1)
48
+ return base, ext
49
+ return name, ""
50
+
51
+
52
+ def _escape_like(s: str) -> str:
53
+ # escape backslash first, then SQL LIKE wildcards
54
+ return s.replace("\\", "\\\\").replace("%", r"\%").replace("_", r"\_")
55
+
56
+
57
+ def _like_for_suffixes(base: str, ext: str) -> str:
58
+ eb = _escape_like(base)
59
+ if ext:
60
+ ex = ext.replace("%", r"\%").replace("_", r"\_")
61
+ return f"{eb}\\_%." + ex # literal underscore
62
+ else:
63
+ return f"{eb}\\_%"
64
+
65
+
66
+ def _next_available_name(conn, user_id: str, base_name: str) -> str:
67
+ """
68
+ Compute the next available non-conflicting name for a given user.
69
+ Handles names with or without extensions and existing _N suffixes.
70
+ """
71
+ base, ext = _split_base_ext(base_name)
72
+
73
+ # Load all sibling names once
74
+ rows = conn.execute(
75
+ sa.text("""
76
+ SELECT name
77
+ FROM file
78
+ WHERE user_id = :uid
79
+ AND (name = :base_name OR name LIKE :like ESCAPE '\\')
80
+ """),
81
+ {"uid": user_id, "base_name": base_name, "like": _like_for_suffixes(base, ext)},
82
+ ).scalars().all()
83
+
84
+ taken: Set[str] = set(rows)
85
+
86
+ # Pattern to detect base_N(.ext) and capture N
87
+ if ext:
88
+ rx = re.compile(rf"^{re.escape(base)}_(\d+)\.{re.escape(ext)}$")
89
+ else:
90
+ rx = re.compile(rf"^{re.escape(base)}_(\d+)$")
91
+
92
+ max_n = 1
93
+ for n in rows:
94
+ m = rx.match(n)
95
+ if m:
96
+ max_n = max(max_n, int(m.group(1)))
97
+
98
+ n = max(max_n + 1, DUPLICATE_SUFFIX_START)
99
+ while True:
100
+ candidate = f"{base}_{n}.{ext}" if ext else f"{base}_{n}"
101
+ if candidate not in taken:
102
+ return candidate
103
+ n += 1
104
+
105
+
106
+ def _handle_duplicates_before_upgrade(conn) -> None:
107
+ """
108
+ Ensure (user_id, name) is unique by renaming older duplicates before adding the composite unique constraint.
109
+ Keeps the most recently updated/created/id-highest record; renames the rest with _N suffix.
110
+ """
111
+ logger.info("Scanning for duplicate file names per user...")
112
+ duplicates = conn.execute(
113
+ sa.text(
114
+ """
115
+ SELECT user_id, name, COUNT(*) AS cnt
116
+ FROM file
117
+ GROUP BY user_id, name
118
+ HAVING COUNT(*) > 1
119
+ """
120
+ )
121
+ ).fetchall()
122
+
123
+ if not duplicates:
124
+ logger.info("No duplicates found.")
125
+ return
126
+
127
+ logger.info("Found %d duplicate sets. Resolving...", len(duplicates))
128
+
129
+ # Add progress indicator for large datasets
130
+ if len(duplicates) > 100:
131
+ logger.info("Large number of duplicates detected. This may take several minutes...")
132
+
133
+ # Wrap in a nested transaction so we fail cleanly on any error
134
+ with conn.begin_nested():
135
+ # Process duplicates in batches for better performance on large datasets
136
+ for batch_start in range(0, len(duplicates), BATCH_SIZE):
137
+ batch_end = min(batch_start + BATCH_SIZE, len(duplicates))
138
+ batch = duplicates[batch_start:batch_end]
139
+
140
+ if len(duplicates) > BATCH_SIZE:
141
+ logger.info("Processing batch %d-%d of %d duplicate sets...",
142
+ batch_start + 1, batch_end, len(duplicates))
143
+
144
+ for user_id, name, cnt in batch:
145
+ logger.debug("Resolving duplicates for user=%s, name=%r (count=%s)", user_id, name, cnt)
146
+
147
+ file_ids = conn.execute(
148
+ sa.text(
149
+ """
150
+ SELECT id
151
+ FROM file
152
+ WHERE user_id = :uid AND name = :name
153
+ ORDER BY updated_at DESC, created_at DESC, id DESC
154
+ """
155
+ ),
156
+ {"uid": user_id, "name": name},
157
+ ).scalars().all()
158
+
159
+ # Keep the first (most recent), rename the rest
160
+ for file_id in file_ids[1:]:
161
+ new_name = _next_available_name(conn, user_id, name)
162
+ conn.execute(
163
+ sa.text("UPDATE file SET name = :new_name WHERE id = :fid"),
164
+ {"new_name": new_name, "fid": file_id},
165
+ )
166
+ logger.debug("Renamed id=%s: %r -> %r", file_id, name, new_name)
167
+
168
+ # Progress update for large batches
169
+ if len(duplicates) > BATCH_SIZE and batch_end < len(duplicates):
170
+ logger.info("Completed %d of %d duplicate sets (%.1f%%)",
171
+ batch_end, len(duplicates), (batch_end / len(duplicates)) * 100)
172
+
173
+ logger.info("Duplicate resolution completed.")
174
+
175
+
176
+ def upgrade() -> None:
177
+ start_time = time.time()
178
+ logger.info("Starting upgrade: adding composite unique (name, user_id) on file")
179
+
180
+ conn = op.get_bind()
181
+ inspector = inspect(conn)
182
+
183
+ # 1) Resolve pre-existing duplicates so the new unique can be created
184
+ duplicate_start = time.time()
185
+ _handle_duplicates_before_upgrade(conn)
186
+ duplicate_duration = time.time() - duplicate_start
187
+
188
+ if duplicate_duration > 1.0: # Only log if it took more than 1 second
189
+ logger.info("Duplicate resolution completed in %.2f seconds", duplicate_duration)
190
+
191
+ # 2) Detect existing single-column unique on name (if any)
192
+ inspector = inspect(conn) # refresh inspector
193
+ single_name_uc = _get_unique_constraints_by_columns(inspector, "file", {"name"})
194
+ composite_uc = _get_unique_constraints_by_columns(inspector, "file", {"name", "user_id"})
195
+
196
+ # 3) Use a unified, reflection-based batch_alter_table for both Postgres and SQLite.
197
+ # recreate="always" ensures a safe table rebuild on SQLite and a standard alter on Postgres.
198
+ constraint_start = time.time()
199
+ with op.batch_alter_table("file", recreate="always") as batch_op:
200
+ # Drop old single-column unique if present
201
+ if single_name_uc:
202
+ logger.info("Dropping existing single-column unique: %s", single_name_uc)
203
+ batch_op.drop_constraint(single_name_uc, type_="unique")
204
+
205
+ # Create composite unique if not already present
206
+ if not composite_uc:
207
+ logger.info("Creating composite unique: file_name_user_id_key on (name, user_id)")
208
+ batch_op.create_unique_constraint("file_name_user_id_key", ["name", "user_id"])
209
+ else:
210
+ logger.info("Composite unique already present: %s", composite_uc)
211
+
212
+ constraint_duration = time.time() - constraint_start
213
+ if constraint_duration > 1.0: # Only log if it took more than 1 second
214
+ logger.info("Constraint operations completed in %.2f seconds", constraint_duration)
215
+
216
+ total_duration = time.time() - start_time
217
+ logger.info("Upgrade completed successfully in %.2f seconds", total_duration)
218
+
219
+
220
+ def downgrade() -> None:
221
+ start_time = time.time()
222
+ logger.info("Starting downgrade: reverting to single-column unique on (name)")
223
+
224
+ conn = op.get_bind()
225
+ inspector = inspect(conn)
226
+
227
+ # 1) Ensure no cross-user duplicates on name (since we'll enforce global uniqueness on name)
228
+ logger.info("Checking for cross-user duplicate names prior to downgrade...")
229
+ validation_start = time.time()
230
+
231
+ dup_names = conn.execute(
232
+ sa.text(
233
+ """
234
+ SELECT name, COUNT(*) AS cnt
235
+ FROM file
236
+ GROUP BY name
237
+ HAVING COUNT(*) > 1
238
+ """
239
+ )
240
+ ).fetchall()
241
+
242
+ validation_duration = time.time() - validation_start
243
+ if validation_duration > 1.0: # Only log if it took more than 1 second
244
+ logger.info("Validation completed in %.2f seconds", validation_duration)
245
+
246
+ if dup_names:
247
+ examples = [row[0] for row in dup_names[:10]]
248
+ raise RuntimeError(
249
+ "Downgrade aborted: duplicate names exist across users. "
250
+ f"Examples: {examples}{'...' if len(dup_names) > 10 else ''}. "
251
+ "Rename conflicting files before downgrading."
252
+ )
253
+
254
+ # 2) Detect constraints
255
+ inspector = inspect(conn) # refresh
256
+ composite_uc = _get_unique_constraints_by_columns(inspector, "file", {"name", "user_id"})
257
+ single_name_uc = _get_unique_constraints_by_columns(inspector, "file", {"name"})
258
+
259
+ # 3) Perform alteration using batch with reflect to preserve other objects
260
+ constraint_start = time.time()
261
+ with op.batch_alter_table("file", recreate="always") as batch_op:
262
+ if composite_uc:
263
+ logger.info("Dropping composite unique: %s", composite_uc)
264
+ batch_op.drop_constraint(composite_uc, type_="unique")
265
+ else:
266
+ logger.info("No composite unique found to drop.")
267
+
268
+ if not single_name_uc:
269
+ logger.info("Creating single-column unique: file_name_key on (name)")
270
+ batch_op.create_unique_constraint("file_name_key", ["name"])
271
+ else:
272
+ logger.info("Single-column unique already present: %s", single_name_uc)
273
+
274
+ constraint_duration = time.time() - constraint_start
275
+ if constraint_duration > 1.0: # Only log if it took more than 1 second
276
+ logger.info("Constraint operations completed in %.2f seconds", constraint_duration)
277
+
278
+ total_duration = time.time() - start_time
279
+ logger.info("Downgrade completed successfully in %.2f seconds", total_duration)
@@ -16,14 +16,14 @@ from langflow.base.mcp.util import (
16
16
  )
17
17
  from langflow.custom.custom_component.component_with_cache import ComponentWithCache
18
18
  from langflow.inputs.inputs import InputTypes # noqa: TC001
19
- from langflow.io import DropdownInput, McpInput, MessageTextInput, Output
19
+ from langflow.io import DropdownInput, McpInput, MessageTextInput, Output, SecretStrInput
20
20
  from langflow.io.schema import flatten_schema, schema_to_langflow_inputs
21
21
  from langflow.logging import logger
22
22
  from langflow.schema.dataframe import DataFrame
23
23
  from langflow.schema.message import Message
24
- from langflow.services.auth.utils import create_user_longterm_token
25
24
 
26
25
  # Import get_server from the backend API
26
+ from langflow.services.auth.utils import create_user_longterm_token, get_current_user
27
27
  from langflow.services.database.models.user.crud import get_user_by_id
28
28
  from langflow.services.deps import get_session, get_settings_service, get_storage_service
29
29
 
@@ -96,6 +96,13 @@ class MCPToolsComponent(ComponentWithCache):
96
96
  show=False,
97
97
  tool_mode=False,
98
98
  ),
99
+ SecretStrInput(
100
+ name="api_key",
101
+ display_name="Langflow API Key",
102
+ info="Langflow API key for authentication when fetching MCP servers and tools.",
103
+ required=False,
104
+ advanced=True,
105
+ ),
99
106
  ]
100
107
 
101
108
  outputs = [
@@ -155,8 +162,18 @@ class MCPToolsComponent(ComponentWithCache):
155
162
 
156
163
  try:
157
164
  async for db in get_session():
158
- user_id, _ = await create_user_longterm_token(db)
159
- current_user = await get_user_by_id(db, user_id)
165
+ # TODO: In 1.6, this may need to be removed or adjusted
166
+ # Try to get the super user token, if possible
167
+ if self.api_key:
168
+ current_user = await get_current_user(
169
+ token=None,
170
+ query_param=self.api_key,
171
+ header_param=None,
172
+ db=db,
173
+ )
174
+ else:
175
+ user_id, _ = await create_user_longterm_token(db)
176
+ current_user = await get_user_by_id(db, user_id)
160
177
 
161
178
  # Try to get server config from DB/API
162
179
  server_config = await get_server(
@@ -139,8 +139,8 @@ class KBIngestionComponent(Component):
139
139
  {
140
140
  "column_name": "text",
141
141
  "vectorize": True,
142
- "identifier": False,
143
- }
142
+ "identifier": True,
143
+ },
144
144
  ],
145
145
  ),
146
146
  IntInput(
@@ -187,9 +187,8 @@ class KBIngestionComponent(Component):
187
187
  df_columns = set(df_source.columns)
188
188
  for config in config_list:
189
189
  col_name = config.get("column_name")
190
- if col_name not in df_columns and not self.silent_errors:
190
+ if col_name not in df_columns:
191
191
  msg = f"Column '{col_name}' not found in DataFrame. Available columns: {sorted(df_columns)}"
192
- self.log(f"Warning: {msg}")
193
192
  raise ValueError(msg)
194
193
 
195
194
  return config_list
@@ -295,9 +294,7 @@ class KBIngestionComponent(Component):
295
294
  if not cfg_path.exists():
296
295
  cfg_path.write_text(json.dumps(config_list, indent=2))
297
296
 
298
- except Exception as e:
299
- if not self.silent_errors:
300
- raise
297
+ except (OSError, TypeError, ValueError) as e:
301
298
  self.log(f"Error saving KB files: {e}")
302
299
 
303
300
  def _build_column_metadata(self, config_list: list[dict[str, Any]], df_source: pd.DataFrame) -> dict[str, Any]:
@@ -367,9 +364,7 @@ class KBIngestionComponent(Component):
367
364
  chroma.add_documents(documents)
368
365
  self.log(f"Added {len(documents)} documents to vector store '{self.knowledge_base}'")
369
366
 
370
- except Exception as e:
371
- if not self.silent_errors:
372
- raise
367
+ except (OSError, ValueError, RuntimeError) as e:
373
368
  self.log(f"Error creating vector store: {e}")
374
369
 
375
370
  def _convert_df_to_data_objects(self, df_source: pd.DataFrame, config_list: list[dict[str, Any]]) -> list[Data]:
@@ -407,16 +402,22 @@ class KBIngestionComponent(Component):
407
402
 
408
403
  # Convert each row to a Data object
409
404
  for _, row in df_source.iterrows():
410
- # Build content text from vectorized columns using list comprehension
411
- content_parts = [str(row[col]) for col in content_cols if col in row and pd.notna(row[col])]
405
+ # Build content text from identifier columns using list comprehension
406
+ identifier_parts = [str(row[col]) for col in content_cols if col in row and pd.notna(row[col])]
412
407
 
413
- page_content = " ".join(content_parts)
408
+ # Join all parts into a single string
409
+ page_content = " ".join(identifier_parts)
414
410
 
415
411
  # Build metadata from NON-vectorized columns only (simple key-value pairs)
416
412
  data_dict = {
417
413
  "text": page_content, # Main content for vectorization
418
414
  }
419
415
 
416
+ # Add identifier columns if they exist
417
+ if identifier_cols:
418
+ identifier_parts = [str(row[col]) for col in identifier_cols if col in row and pd.notna(row[col])]
419
+ page_content = " ".join(identifier_parts)
420
+
420
421
  # Add metadata columns as simple key-value pairs
421
422
  for col in df_source.columns:
422
423
  if col not in content_cols and col in row and pd.notna(row[col]):
@@ -526,9 +527,7 @@ class KBIngestionComponent(Component):
526
527
 
527
528
  return Data(data=meta)
528
529
 
529
- except Exception as e:
530
- if not self.silent_errors:
531
- raise
530
+ except (OSError, ValueError, RuntimeError, KeyError) as e:
532
531
  self.log(f"Error in KB ingestion: {e}")
533
532
  self.status = f"❌ KB ingestion failed: {e}"
534
533
  return Data(data={"error": str(e), "kb_name": self.knowledge_base})
@@ -1,7 +1,13 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import signal
4
+ import sys
5
+ import traceback
6
+ from contextlib import suppress
3
7
  from typing import TYPE_CHECKING, Any
4
8
 
9
+ from loguru import logger
10
+
5
11
  from langflow.components._importing import import_mod
6
12
 
7
13
  if TYPE_CHECKING:
@@ -41,3 +47,195 @@ def __getattr__(attr_name: str) -> Any:
41
47
 
42
48
  def __dir__() -> list[str]:
43
49
  return list(__all__)
50
+
51
+
52
+ def docling_worker(file_paths: list[str], queue, pipeline: str, ocr_engine: str):
53
+ """Worker function for processing files with Docling in a separate process."""
54
+ # Signal handling for graceful shutdown
55
+ shutdown_requested = False
56
+
57
+ def signal_handler(signum: int, frame) -> None: # noqa: ARG001
58
+ """Handle shutdown signals gracefully."""
59
+ nonlocal shutdown_requested
60
+ signal_names: dict[int, str] = {signal.SIGTERM: "SIGTERM", signal.SIGINT: "SIGINT"}
61
+ signal_name = signal_names.get(signum, f"signal {signum}")
62
+
63
+ logger.debug(f"Docling worker received {signal_name}, initiating graceful shutdown...")
64
+ shutdown_requested = True
65
+
66
+ # Send shutdown notification to parent process
67
+ with suppress(Exception):
68
+ queue.put({"error": f"Worker interrupted by {signal_name}", "shutdown": True})
69
+
70
+ # Exit gracefully
71
+ sys.exit(0)
72
+
73
+ def check_shutdown() -> None:
74
+ """Check if shutdown was requested and exit if so."""
75
+ if shutdown_requested:
76
+ logger.info("Shutdown requested, exiting worker...")
77
+
78
+ with suppress(Exception):
79
+ queue.put({"error": "Worker shutdown requested", "shutdown": True})
80
+
81
+ sys.exit(0)
82
+
83
+ # Register signal handlers early
84
+ try:
85
+ signal.signal(signal.SIGTERM, signal_handler)
86
+ signal.signal(signal.SIGINT, signal_handler)
87
+ logger.debug("Signal handlers registered for graceful shutdown")
88
+ except (OSError, ValueError) as e:
89
+ # Some signals might not be available on all platforms
90
+ logger.warning(f"Warning: Could not register signal handlers: {e}")
91
+
92
+ # Check for shutdown before heavy imports
93
+ check_shutdown()
94
+
95
+ try:
96
+ from docling.datamodel.base_models import ConversionStatus, InputFormat
97
+ from docling.datamodel.pipeline_options import (
98
+ OcrOptions,
99
+ PdfPipelineOptions,
100
+ VlmPipelineOptions,
101
+ )
102
+ from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
103
+ from docling.models.factories import get_ocr_factory
104
+ from docling.pipeline.vlm_pipeline import VlmPipeline
105
+
106
+ # Check for shutdown after imports
107
+ check_shutdown()
108
+ logger.debug("Docling dependencies loaded successfully")
109
+
110
+ except ModuleNotFoundError:
111
+ msg = (
112
+ "Docling is an optional dependency of Langflow. "
113
+ "Install with `uv pip install 'langflow[docling]'` "
114
+ "or refer to the documentation"
115
+ )
116
+ queue.put({"error": msg})
117
+ return
118
+ except ImportError as e:
119
+ # A different import failed (e.g., a transitive dependency); preserve details.
120
+ queue.put({"error": f"Failed to import a Docling dependency: {e}"})
121
+ return
122
+ except KeyboardInterrupt:
123
+ logger.warning("KeyboardInterrupt during imports, exiting...")
124
+ queue.put({"error": "Worker interrupted during imports", "shutdown": True})
125
+ return
126
+
127
+ # Configure the standard PDF pipeline
128
+ def _get_standard_opts() -> PdfPipelineOptions:
129
+ check_shutdown() # Check before heavy operations
130
+
131
+ pipeline_options = PdfPipelineOptions()
132
+ pipeline_options.do_ocr = ocr_engine != ""
133
+ if pipeline_options.do_ocr:
134
+ ocr_factory = get_ocr_factory(
135
+ allow_external_plugins=False,
136
+ )
137
+
138
+ ocr_options: OcrOptions = ocr_factory.create_options(
139
+ kind=ocr_engine,
140
+ )
141
+ pipeline_options.ocr_options = ocr_options
142
+ return pipeline_options
143
+
144
+ # Configure the VLM pipeline
145
+ def _get_vlm_opts() -> VlmPipelineOptions:
146
+ check_shutdown() # Check before heavy operations
147
+ return VlmPipelineOptions()
148
+
149
+ # Configure the main format options and create the DocumentConverter()
150
+ def _get_converter() -> DocumentConverter:
151
+ check_shutdown() # Check before heavy operations
152
+
153
+ if pipeline == "standard":
154
+ pdf_format_option = PdfFormatOption(
155
+ pipeline_options=_get_standard_opts(),
156
+ )
157
+ elif pipeline == "vlm":
158
+ pdf_format_option = PdfFormatOption(pipeline_cls=VlmPipeline, pipeline_options=_get_vlm_opts())
159
+ else:
160
+ msg = f"Unknown pipeline: {pipeline!r}"
161
+ raise ValueError(msg)
162
+
163
+ format_options: dict[InputFormat, FormatOption] = {
164
+ InputFormat.PDF: pdf_format_option,
165
+ InputFormat.IMAGE: pdf_format_option,
166
+ }
167
+
168
+ return DocumentConverter(format_options=format_options)
169
+
170
+ try:
171
+ # Check for shutdown before creating converter (can be slow)
172
+ check_shutdown()
173
+ logger.info(f"Initializing {pipeline} pipeline with OCR: {ocr_engine or 'disabled'}")
174
+
175
+ converter = _get_converter()
176
+
177
+ # Check for shutdown before processing files
178
+ check_shutdown()
179
+ logger.info(f"Starting to process {len(file_paths)} files...")
180
+
181
+ # Process files with periodic shutdown checks
182
+ results = []
183
+ for i, file_path in enumerate(file_paths):
184
+ # Check for shutdown before processing each file
185
+ check_shutdown()
186
+
187
+ logger.debug(f"Processing file {i + 1}/{len(file_paths)}: {file_path}")
188
+
189
+ try:
190
+ # Process single file (we can't easily interrupt convert_all)
191
+ single_result = converter.convert_all([file_path])
192
+ results.extend(single_result)
193
+
194
+ # Check for shutdown after each file
195
+ check_shutdown()
196
+
197
+ except (OSError, ValueError, RuntimeError, ImportError) as file_error:
198
+ # Handle specific file processing errors
199
+ logger.error(f"Error processing file {file_path}: {file_error}")
200
+ # Continue with other files, but check for shutdown
201
+ check_shutdown()
202
+ except Exception as file_error: # noqa: BLE001
203
+ # Catch any other unexpected errors to prevent worker crash
204
+ logger.error(f"Unexpected error processing file {file_path}: {file_error}")
205
+ # Continue with other files, but check for shutdown
206
+ check_shutdown()
207
+
208
+ # Final shutdown check before sending results
209
+ check_shutdown()
210
+
211
+ # Process the results while maintaining the original structure
212
+ processed_data = [
213
+ {"document": res.document, "file_path": str(res.input.file), "status": res.status.name}
214
+ if res.status == ConversionStatus.SUCCESS
215
+ else None
216
+ for res in results
217
+ ]
218
+
219
+ logger.info(f"Successfully processed {len([d for d in processed_data if d])} files")
220
+ queue.put(processed_data)
221
+
222
+ except KeyboardInterrupt:
223
+ logger.warning("KeyboardInterrupt during processing, exiting gracefully...")
224
+ queue.put({"error": "Worker interrupted during processing", "shutdown": True})
225
+ return
226
+ except Exception as e: # noqa: BLE001
227
+ if shutdown_requested:
228
+ logger.exception("Exception occurred during shutdown, exiting...")
229
+ return
230
+
231
+ # Send any processing error to the main process with traceback
232
+ error_info = {"error": str(e), "traceback": traceback.format_exc()}
233
+ logger.error(f"Error in worker: {error_info}")
234
+ queue.put(error_info)
235
+ finally:
236
+ logger.info("Docling worker finishing...")
237
+ # Ensure we don't leave any hanging processes
238
+ if shutdown_requested:
239
+ logger.debug("Worker shutdown completed")
240
+ else:
241
+ logger.debug("Worker completed normally")