lfx-nightly 0.2.0.dev26__py3-none-any.whl → 0.2.1.dev7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. lfx/_assets/component_index.json +1 -1
  2. lfx/base/agents/agent.py +9 -4
  3. lfx/base/agents/altk_base_agent.py +16 -3
  4. lfx/base/agents/altk_tool_wrappers.py +1 -1
  5. lfx/base/agents/utils.py +4 -0
  6. lfx/base/composio/composio_base.py +78 -41
  7. lfx/base/data/base_file.py +14 -4
  8. lfx/base/data/cloud_storage_utils.py +156 -0
  9. lfx/base/data/docling_utils.py +191 -65
  10. lfx/base/data/storage_utils.py +109 -0
  11. lfx/base/datastax/astradb_base.py +75 -64
  12. lfx/base/mcp/util.py +2 -2
  13. lfx/base/models/__init__.py +11 -1
  14. lfx/base/models/anthropic_constants.py +21 -12
  15. lfx/base/models/google_generative_ai_constants.py +33 -9
  16. lfx/base/models/model_metadata.py +6 -0
  17. lfx/base/models/ollama_constants.py +196 -30
  18. lfx/base/models/openai_constants.py +37 -10
  19. lfx/base/models/unified_models.py +1123 -0
  20. lfx/base/models/watsonx_constants.py +36 -0
  21. lfx/base/tools/component_tool.py +2 -9
  22. lfx/cli/commands.py +6 -1
  23. lfx/cli/run.py +65 -409
  24. lfx/cli/script_loader.py +13 -3
  25. lfx/components/__init__.py +0 -3
  26. lfx/components/composio/github_composio.py +1 -1
  27. lfx/components/cuga/cuga_agent.py +39 -27
  28. lfx/components/data_source/api_request.py +4 -2
  29. lfx/components/docling/__init__.py +45 -11
  30. lfx/components/docling/chunk_docling_document.py +3 -1
  31. lfx/components/docling/docling_inline.py +39 -49
  32. lfx/components/docling/export_docling_document.py +3 -1
  33. lfx/components/elastic/opensearch_multimodal.py +215 -57
  34. lfx/components/files_and_knowledge/file.py +439 -39
  35. lfx/components/files_and_knowledge/ingestion.py +8 -0
  36. lfx/components/files_and_knowledge/retrieval.py +10 -0
  37. lfx/components/files_and_knowledge/save_file.py +123 -53
  38. lfx/components/ibm/watsonx.py +7 -1
  39. lfx/components/input_output/chat_output.py +7 -1
  40. lfx/components/langchain_utilities/tool_calling.py +14 -6
  41. lfx/components/llm_operations/batch_run.py +80 -25
  42. lfx/components/llm_operations/lambda_filter.py +33 -6
  43. lfx/components/llm_operations/llm_conditional_router.py +39 -7
  44. lfx/components/llm_operations/structured_output.py +38 -12
  45. lfx/components/models/__init__.py +16 -74
  46. lfx/components/models_and_agents/agent.py +51 -201
  47. lfx/components/models_and_agents/embedding_model.py +185 -339
  48. lfx/components/models_and_agents/language_model.py +54 -318
  49. lfx/components/models_and_agents/mcp_component.py +58 -9
  50. lfx/components/ollama/ollama.py +9 -4
  51. lfx/components/ollama/ollama_embeddings.py +2 -1
  52. lfx/components/openai/openai_chat_model.py +1 -1
  53. lfx/components/processing/__init__.py +0 -3
  54. lfx/components/vllm/__init__.py +37 -0
  55. lfx/components/vllm/vllm.py +141 -0
  56. lfx/components/vllm/vllm_embeddings.py +110 -0
  57. lfx/custom/custom_component/custom_component.py +8 -6
  58. lfx/custom/directory_reader/directory_reader.py +5 -2
  59. lfx/graph/utils.py +64 -18
  60. lfx/inputs/__init__.py +2 -0
  61. lfx/inputs/input_mixin.py +54 -0
  62. lfx/inputs/inputs.py +115 -0
  63. lfx/interface/initialize/loading.py +42 -12
  64. lfx/io/__init__.py +2 -0
  65. lfx/run/__init__.py +5 -0
  66. lfx/run/base.py +494 -0
  67. lfx/schema/data.py +1 -1
  68. lfx/schema/image.py +28 -19
  69. lfx/schema/message.py +19 -3
  70. lfx/services/interfaces.py +5 -0
  71. lfx/services/manager.py +5 -4
  72. lfx/services/mcp_composer/service.py +45 -13
  73. lfx/services/settings/auth.py +18 -11
  74. lfx/services/settings/base.py +12 -24
  75. lfx/services/settings/constants.py +2 -0
  76. lfx/services/storage/local.py +37 -0
  77. lfx/services/storage/service.py +19 -0
  78. lfx/utils/constants.py +1 -0
  79. lfx/utils/image.py +29 -11
  80. lfx/utils/validate_cloud.py +14 -3
  81. {lfx_nightly-0.2.0.dev26.dist-info → lfx_nightly-0.2.1.dev7.dist-info}/METADATA +5 -2
  82. {lfx_nightly-0.2.0.dev26.dist-info → lfx_nightly-0.2.1.dev7.dist-info}/RECORD +84 -78
  83. lfx/components/processing/dataframe_to_toolset.py +0 -259
  84. {lfx_nightly-0.2.0.dev26.dist-info → lfx_nightly-0.2.1.dev7.dist-info}/WHEEL +0 -0
  85. {lfx_nightly-0.2.0.dev26.dist-info → lfx_nightly-0.2.1.dev7.dist-info}/entry_points.txt +0 -0
@@ -3,7 +3,7 @@ from lfx.base.composio.composio_base import ComposioBaseComponent
3
3
 
4
4
  class ComposioGitHubAPIComponent(ComposioBaseComponent):
5
5
  display_name: str = "GitHub"
6
- icon = "Github"
6
+ icon = "GithubComposio"
7
7
  documentation: str = "https://docs.composio.dev"
8
8
  app_name = "github"
9
9
 
@@ -53,7 +53,7 @@ class CugaComponent(ToolCallingAgentComponent):
53
53
  """Cuga Agent Component for advanced AI task execution.
54
54
 
55
55
  The Cuga component is an advanced AI agent that can execute complex tasks using
56
- various tools and browser automation. It supports custom policies, web applications,
56
+ various tools and browser automation. It supports custom instructions, web applications,
57
57
  and API interactions.
58
58
 
59
59
  Attributes:
@@ -65,7 +65,7 @@ class CugaComponent(ToolCallingAgentComponent):
65
65
  """
66
66
 
67
67
  display_name: str = "Cuga"
68
- description: str = "Define the Cuga agent's policies, then assign it a task."
68
+ description: str = "Define the Cuga agent's instructions, then assign it a task."
69
69
  documentation: str = "https://docs.langflow.org/bundles-cuga"
70
70
  icon = "bot"
71
71
  name = "Cuga"
@@ -85,10 +85,10 @@ class CugaComponent(ToolCallingAgentComponent):
85
85
  ),
86
86
  *MODEL_PROVIDERS_DICT["OpenAI"]["inputs"],
87
87
  MultilineInput(
88
- name="policies",
89
- display_name="Policies",
88
+ name="instructions",
89
+ display_name="Instructions",
90
90
  info=(
91
- "Custom instructions or policies for the agent to adhere to during its operation.\n"
91
+ "Custom instructions for the agent to adhere to during its operation.\n"
92
92
  "Example:\n"
93
93
  "## Plan\n"
94
94
  "< planning instructions e.g. which tools and when to use>\n"
@@ -117,16 +117,16 @@ class CugaComponent(ToolCallingAgentComponent):
117
117
  BoolInput(
118
118
  name="lite_mode",
119
119
  display_name="Enable CugaLite",
120
- info="Enable CugaLite for simple API tasks (faster execution).",
120
+ info="Faster reasoning for simple tasks. Enable CugaLite for simple API tasks.",
121
121
  value=True,
122
- advanced=False,
122
+ advanced=True,
123
123
  ),
124
124
  IntInput(
125
125
  name="lite_mode_tool_threshold",
126
126
  display_name="CugaLite Tool Threshold",
127
127
  info="Route to CugaLite if app has fewer than this many tools.",
128
128
  value=25,
129
- advanced=False,
129
+ advanced=True,
130
130
  ),
131
131
  DropdownInput(
132
132
  name="decomposition_strategy",
@@ -142,17 +142,17 @@ class CugaComponent(ToolCallingAgentComponent):
142
142
  display_name="Enable Browser",
143
143
  info="Toggle to enable a built-in browser tool for web scraping and searching.",
144
144
  value=False,
145
- advanced=False,
145
+ advanced=True,
146
146
  ),
147
147
  MultilineInput(
148
148
  name="web_apps",
149
149
  display_name="Web applications",
150
150
  info=(
151
- "Define a list of web applications that cuga will open when enable browser is true. "
151
+ "Cuga will automatically start this web application when Enable Browser is true. "
152
152
  "Currently only supports one web application. Example: https://example.com"
153
153
  ),
154
154
  value="",
155
- advanced=False,
155
+ advanced=True,
156
156
  ),
157
157
  ]
158
158
  outputs = [
@@ -211,7 +211,6 @@ class CugaComponent(ToolCallingAgentComponent):
211
211
  settings.advanced_features.mode = "api"
212
212
 
213
213
  from cuga.backend.activity_tracker.tracker import ActivityTracker
214
- from cuga.backend.cuga_graph.nodes.api.variables_manager.manager import VariablesManager
215
214
  from cuga.backend.cuga_graph.utils.agent_loop import StreamEvent
216
215
  from cuga.backend.cuga_graph.utils.controller import (
217
216
  AgentRunner as CugaAgent,
@@ -222,13 +221,10 @@ class CugaComponent(ToolCallingAgentComponent):
222
221
  from cuga.backend.llm.models import LLMManager
223
222
  from cuga.configurations.instructions_manager import InstructionsManager
224
223
 
225
- var_manager = VariablesManager()
226
-
227
224
  # Reset var_manager if this is the first message in history
228
225
  logger.debug(f"[CUGA] Checking history_messages: count={len(history_messages) if history_messages else 0}")
229
226
  if not history_messages or len(history_messages) == 0:
230
227
  logger.debug("[CUGA] First message in history detected, resetting var_manager")
231
- var_manager.reset()
232
228
  else:
233
229
  logger.debug(f"[CUGA] Continuing conversation with {len(history_messages)} previous messages")
234
230
 
@@ -236,12 +232,14 @@ class CugaComponent(ToolCallingAgentComponent):
236
232
  llm_manager.set_llm(llm)
237
233
  instructions_manager = InstructionsManager()
238
234
 
239
- policies_to_use = self.policies or ""
240
- logger.debug(f"[CUGA] policies are: {policies_to_use}")
241
- instructions_manager.set_instructions_from_one_file(policies_to_use)
235
+ instructions_to_use = self.instructions or ""
236
+ logger.debug(f"[CUGA] instructions are: {instructions_to_use}")
237
+ instructions_manager.set_instructions_from_one_file(instructions_to_use)
242
238
  tracker = ActivityTracker()
243
239
  tracker.set_tools(tools)
244
- cuga_agent = CugaAgent(browser_enabled=self.browser_enabled)
240
+ thread_id = self.graph.session_id
241
+ logger.debug(f"[CUGA] Using thread_id (session_id): {thread_id}")
242
+ cuga_agent = CugaAgent(browser_enabled=self.browser_enabled, thread_id=thread_id)
245
243
  if self.browser_enabled:
246
244
  await cuga_agent.initialize_freemode_env(start_url=self.web_apps.strip(), interface_mode="browser_only")
247
245
  else:
@@ -257,13 +255,20 @@ class CugaComponent(ToolCallingAgentComponent):
257
255
  logger.debug(f"[CUGA] Processing input: {current_input}")
258
256
  try:
259
257
  # Convert history to LangChain format for the event
258
+ logger.debug(f"[CUGA] Converting {len(history_messages)} history messages to LangChain format")
260
259
  lc_messages = []
261
- for msg in history_messages:
260
+ for i, msg in enumerate(history_messages):
261
+ msg_text = getattr(msg, "text", "N/A")[:50] if hasattr(msg, "text") else "N/A"
262
+ logger.debug(
263
+ f"[CUGA] Message {i}: type={type(msg)}, sender={getattr(msg, 'sender', 'N/A')}, "
264
+ f"text={msg_text}..."
265
+ )
262
266
  if hasattr(msg, "sender") and msg.sender == "Human":
263
267
  lc_messages.append(HumanMessage(content=msg.text))
264
268
  else:
265
269
  lc_messages.append(AIMessage(content=msg.text))
266
270
 
271
+ logger.debug(f"[CUGA] Converted to {len(lc_messages)} LangChain messages")
267
272
  await asyncio.sleep(0.5)
268
273
 
269
274
  # 2. Build final response
@@ -274,7 +279,9 @@ class CugaComponent(ToolCallingAgentComponent):
274
279
  last_event: StreamEvent | None = None
275
280
  tool_run_id: str | None = None
276
281
  # 3. Chain end event with AgentFinish
277
- async for event in cuga_agent.run_task_generic_yield(eval_mode=False, goal=current_input):
282
+ async for event in cuga_agent.run_task_generic_yield(
283
+ eval_mode=False, goal=current_input, chat_messages=lc_messages
284
+ ):
278
285
  logger.debug(f"[CUGA] recieved event {event}")
279
286
  if last_event is not None and tool_run_id is not None:
280
287
  logger.debug(f"[CUGA] last event {last_event}")
@@ -350,12 +357,12 @@ class CugaComponent(ToolCallingAgentComponent):
350
357
  raise ValueError(msg)
351
358
 
352
359
  try:
353
- llm_model, self.chat_history, self.tools = await self.get_agent_requirements()
354
-
355
- # Create agent message for event processing
356
360
  from lfx.schema.content_block import ContentBlock
357
361
  from lfx.schema.message import MESSAGE_SENDER_AI
358
362
 
363
+ llm_model, self.chat_history, self.tools = await self.get_agent_requirements()
364
+
365
+ # Create agent message for event processing
359
366
  agent_message = Message(
360
367
  sender=MESSAGE_SENDER_AI,
361
368
  sender_name="Cuga",
@@ -368,7 +375,7 @@ class CugaComponent(ToolCallingAgentComponent):
368
375
  # This ensures streaming works even when not connected to ChatOutput
369
376
  if not self.is_connected_to_chat_output():
370
377
  # When not connected to ChatOutput, assign ID upfront for streaming support
371
- agent_message.data["id"] = str(uuid.uuid4())
378
+ agent_message.data["id"] = uuid.uuid4()
372
379
 
373
380
  # Get input text
374
381
  input_text = self.input_value.text if hasattr(self.input_value, "text") else str(self.input_value)
@@ -476,9 +483,14 @@ class CugaComponent(ToolCallingAgentComponent):
476
483
  """
477
484
  logger.debug("[CUGA] Retrieving chat history messages.")
478
485
  logger.debug(f"[CUGA] Session ID: {self.graph.session_id}")
486
+ logger.debug(f"[CUGA] n_messages: {self.n_messages}")
487
+ logger.debug(f"[CUGA] input_value: {self.input_value}")
488
+ logger.debug(f"[CUGA] input_value type: {type(self.input_value)}")
489
+ logger.debug(f"[CUGA] input_value id: {getattr(self.input_value, 'id', None)}")
490
+
479
491
  messages = (
480
492
  await MemoryComponent(**self.get_base_args())
481
- .set(session_id=self.graph.session_id, order="Ascending", n_messages=self.n_messages)
493
+ .set(session_id=str(self.graph.session_id), order="Ascending", n_messages=self.n_messages)
482
494
  .retrieve_messages()
483
495
  )
484
496
  logger.debug(f"[CUGA] Retrieved {len(messages)} messages from memory")
@@ -678,7 +690,7 @@ class CugaComponent(ToolCallingAgentComponent):
678
690
  "tools",
679
691
  "input_value",
680
692
  "add_current_date_tool",
681
- "policies",
693
+ "instructions",
682
694
  "agent_description",
683
695
  "max_iterations",
684
696
  "handle_parsing_errors",
@@ -493,11 +493,13 @@ class APIRequestComponent(Component):
493
493
  return self.parse_curl(self.curl_input, build_config)
494
494
  return build_config
495
495
 
496
- # print(f"Current mode: {field_value}")
497
496
  if field_value == "cURL":
498
497
  set_field_display(build_config, "curl_input", value=True)
499
498
  if build_config["curl_input"]["value"]:
500
- build_config = self.parse_curl(build_config["curl_input"]["value"], build_config)
499
+ try:
500
+ build_config = self.parse_curl(build_config["curl_input"]["value"], build_config)
501
+ except ValueError as e:
502
+ self.log(f"Failed to parse cURL input: {e}")
501
503
  else:
502
504
  set_field_display(build_config, "curl_input", value=False)
503
505
 
@@ -3,35 +3,69 @@ from __future__ import annotations
3
3
  from typing import TYPE_CHECKING, Any
4
4
 
5
5
  from lfx.components._importing import import_mod
6
+ from lfx.utils.validate_cloud import is_astra_cloud_environment
6
7
 
7
8
  if TYPE_CHECKING:
8
- from .chunk_docling_document import ChunkDoclingDocumentComponent
9
- from .docling_inline import DoclingInlineComponent
10
- from .docling_remote import DoclingRemoteComponent
11
- from .export_docling_document import ExportDoclingDocumentComponent
9
+ from .chunk_docling_document import ChunkDoclingDocumentComponent # noqa: F401
10
+ from .docling_inline import DoclingInlineComponent # noqa: F401
11
+ from .docling_remote import DoclingRemoteComponent # noqa: F401
12
+ from .export_docling_document import ExportDoclingDocumentComponent # noqa: F401
12
13
 
13
- _dynamic_imports = {
14
+ _all_components = [
15
+ "ChunkDoclingDocumentComponent",
16
+ "DoclingInlineComponent",
17
+ "DoclingRemoteComponent",
18
+ "ExportDoclingDocumentComponent",
19
+ ]
20
+
21
+ _all_dynamic_imports = {
14
22
  "ChunkDoclingDocumentComponent": "chunk_docling_document",
15
23
  "DoclingInlineComponent": "docling_inline",
16
24
  "DoclingRemoteComponent": "docling_remote",
17
25
  "ExportDoclingDocumentComponent": "export_docling_document",
18
26
  }
19
27
 
20
- __all__ = [
28
+ # Components that require local Docling/EasyOCR dependencies (disabled in cloud)
29
+ _cloud_disabled_components = {
21
30
  "ChunkDoclingDocumentComponent",
22
31
  "DoclingInlineComponent",
23
- "DoclingRemoteComponent",
24
32
  "ExportDoclingDocumentComponent",
25
- ]
33
+ }
34
+
35
+
36
+ def _get_available_components() -> list[str]:
37
+ """Get list of available components, filtering out cloud-disabled ones."""
38
+ if is_astra_cloud_environment():
39
+ # Only show DoclingRemoteComponent (Docling Serve) in cloud
40
+ return [comp for comp in _all_components if comp not in _cloud_disabled_components]
41
+ return _all_components
42
+
43
+
44
+ def _get_dynamic_imports() -> dict[str, str]:
45
+ """Get dynamic imports dict, filtering out cloud-disabled ones."""
46
+ if is_astra_cloud_environment():
47
+ # Only allow DoclingRemoteComponent (Docling Serve) in cloud
48
+ return {k: v for k, v in _all_dynamic_imports.items() if k not in _cloud_disabled_components}
49
+ return _all_dynamic_imports
50
+
51
+
52
+ # Dynamically set __all__ and _dynamic_imports based on cloud environment
53
+ __all__: list[str] = _get_available_components() # noqa: PLE0605
54
+ _dynamic_imports: dict[str, str] = _get_dynamic_imports()
26
55
 
27
56
 
28
57
  def __getattr__(attr_name: str) -> Any:
29
58
  """Lazily import docling components on attribute access."""
30
- if attr_name not in _dynamic_imports:
59
+ # Check if component is available (not disabled in cloud)
60
+ if is_astra_cloud_environment() and attr_name in _cloud_disabled_components:
61
+ msg = f"module '{__name__}' has no attribute '{attr_name}'"
62
+ raise AttributeError(msg)
63
+
64
+ if attr_name not in _all_dynamic_imports:
31
65
  msg = f"module '{__name__}' has no attribute '{attr_name}'"
32
66
  raise AttributeError(msg)
33
67
  try:
34
- result = import_mod(attr_name, _dynamic_imports[attr_name], __spec__.parent)
68
+ result = import_mod(attr_name, _all_dynamic_imports[attr_name], __spec__.parent)
35
69
  except (ModuleNotFoundError, ImportError, AttributeError) as e:
36
70
  msg = f"Could not import '{attr_name}' from '{__name__}': {e}"
37
71
  raise AttributeError(msg) from e
@@ -40,4 +74,4 @@ def __getattr__(attr_name: str) -> Any:
40
74
 
41
75
 
42
76
  def __dir__() -> list[str]:
43
- return list(__all__)
77
+ return _get_available_components()
@@ -115,7 +115,9 @@ class ChunkDoclingDocumentComponent(Component):
115
115
  return [Data(text=doc.page_content, data=doc.metadata) for doc in docs]
116
116
 
117
117
  def chunk_documents(self) -> DataFrame:
118
- documents = extract_docling_documents(self.data_inputs, self.doc_key)
118
+ documents, warning = extract_docling_documents(self.data_inputs, self.doc_key)
119
+ if warning:
120
+ self.status = warning
119
121
 
120
122
  chunker: BaseChunker
121
123
  if self.chunker == "HybridChunker":
@@ -1,6 +1,6 @@
1
+ import queue
2
+ import threading
1
3
  import time
2
- from multiprocessing import Queue, get_context
3
- from queue import Empty
4
4
 
5
5
  from lfx.base.data import BaseFileComponent
6
6
  from lfx.base.data.docling_utils import _serialize_pydantic_model, docling_worker
@@ -92,60 +92,57 @@ class DoclingInlineComponent(BaseFileComponent):
92
92
  *BaseFileComponent.get_base_outputs(),
93
93
  ]
94
94
 
95
- def _wait_for_result_with_process_monitoring(self, queue: Queue, proc, timeout: int = 300):
96
- """Wait for result from queue while monitoring process health.
95
+ def _wait_for_result_with_thread_monitoring(
96
+ self, result_queue: queue.Queue, thread: threading.Thread, timeout: int = 300
97
+ ):
98
+ """Wait for result from queue while monitoring thread health.
97
99
 
98
- Handles cases where process crashes without sending result.
100
+ Handles cases where thread crashes without sending result.
99
101
  """
100
102
  start_time = time.time()
101
103
 
102
104
  while time.time() - start_time < timeout:
103
- # Check if process is still alive
104
- if not proc.is_alive():
105
- # Process died, try to get any result it might have sent
105
+ # Check if thread is still alive
106
+ if not thread.is_alive():
107
+ # Thread finished, try to get any result it might have sent
106
108
  try:
107
- result = queue.get_nowait()
108
- except Empty:
109
- # Process died without sending result
110
- msg = f"Worker process crashed unexpectedly without producing result. Exit code: {proc.exitcode}"
109
+ result = result_queue.get_nowait()
110
+ except queue.Empty:
111
+ # Thread finished without sending result
112
+ msg = "Worker thread crashed unexpectedly without producing result."
111
113
  raise RuntimeError(msg) from None
112
114
  else:
113
- self.log("Process completed and result retrieved")
115
+ self.log("Thread completed and result retrieved")
114
116
  return result
115
117
 
116
118
  # Poll the queue instead of blocking
117
119
  try:
118
- result = queue.get(timeout=1)
119
- except Empty:
120
+ result = result_queue.get(timeout=1)
121
+ except queue.Empty:
120
122
  # No result yet, continue monitoring
121
123
  continue
122
124
  else:
123
- self.log("Result received from worker process")
125
+ self.log("Result received from worker thread")
124
126
  return result
125
127
 
126
128
  # Overall timeout reached
127
- msg = f"Process timed out after {timeout} seconds"
129
+ msg = f"Thread timed out after {timeout} seconds"
128
130
  raise TimeoutError(msg)
129
131
 
130
- def _terminate_process_gracefully(self, proc, timeout_terminate: int = 10, timeout_kill: int = 5):
131
- """Terminate process gracefully with escalating signals.
132
+ def _stop_thread_gracefully(self, thread: threading.Thread, timeout: int = 10):
133
+ """Wait for thread to complete gracefully.
132
134
 
133
- First tries SIGTERM, then SIGKILL if needed.
135
+ Note: Python threads cannot be forcefully killed, so we just wait.
136
+ The thread should respond to shutdown signals via the queue.
134
137
  """
135
- if not proc.is_alive():
138
+ if not thread.is_alive():
136
139
  return
137
140
 
138
- self.log("Attempting graceful process termination with SIGTERM")
139
- proc.terminate() # Send SIGTERM
140
- proc.join(timeout=timeout_terminate)
141
+ self.log("Waiting for thread to complete gracefully")
142
+ thread.join(timeout=timeout)
141
143
 
142
- if proc.is_alive():
143
- self.log("Process didn't respond to SIGTERM, using SIGKILL")
144
- proc.kill() # Send SIGKILL
145
- proc.join(timeout=timeout_kill)
146
-
147
- if proc.is_alive():
148
- self.log("Warning: Process still alive after SIGKILL")
144
+ if thread.is_alive():
145
+ self.log("Warning: Thread still alive after timeout")
149
146
 
150
147
  def process_files(self, file_list: list[BaseFileComponent.BaseFile]) -> list[BaseFileComponent.BaseFile]:
151
148
  try:
@@ -167,44 +164,37 @@ class DoclingInlineComponent(BaseFileComponent):
167
164
  if self.pic_desc_llm is not None:
168
165
  pic_desc_config = _serialize_pydantic_model(self.pic_desc_llm)
169
166
 
170
- ctx = get_context("spawn")
171
- queue: Queue = ctx.Queue()
172
- proc = ctx.Process(
167
+ # Use threading instead of multiprocessing for memory sharing
168
+ # This enables the global DocumentConverter cache to work across runs
169
+ result_queue: queue.Queue = queue.Queue()
170
+ thread = threading.Thread(
173
171
  target=docling_worker,
174
172
  kwargs={
175
173
  "file_paths": file_paths,
176
- "queue": queue,
174
+ "queue": result_queue,
177
175
  "pipeline": self.pipeline,
178
176
  "ocr_engine": self.ocr_engine,
179
177
  "do_picture_classification": self.do_picture_classification,
180
178
  "pic_desc_config": pic_desc_config,
181
179
  "pic_desc_prompt": self.pic_desc_prompt,
182
180
  },
181
+ daemon=False, # Allow thread to complete even if main thread exits
183
182
  )
184
183
 
185
184
  result = None
186
- proc.start()
185
+ thread.start()
187
186
 
188
187
  try:
189
- result = self._wait_for_result_with_process_monitoring(queue, proc, timeout=300)
188
+ result = self._wait_for_result_with_thread_monitoring(result_queue, thread, timeout=300)
190
189
  except KeyboardInterrupt:
191
- self.log("Docling process cancelled by user")
190
+ self.log("Docling thread cancelled by user")
192
191
  result = []
193
192
  except Exception as e:
194
193
  self.log(f"Error during processing: {e}")
195
194
  raise
196
195
  finally:
197
- # Improved cleanup with graceful termination
198
- try:
199
- self._terminate_process_gracefully(proc)
200
- finally:
201
- # Always close and cleanup queue resources
202
- try:
203
- queue.close()
204
- queue.join_thread()
205
- except Exception as e: # noqa: BLE001
206
- # Ignore cleanup errors, but log them
207
- self.log(f"Warning: Error during queue cleanup - {e}")
196
+ # Wait for thread to complete gracefully
197
+ self._stop_thread_gracefully(thread)
208
198
 
209
199
  # Enhanced error checking with dependency-specific handling
210
200
  if isinstance(result, dict) and "error" in result:
@@ -86,7 +86,9 @@ class ExportDoclingDocumentComponent(Component):
86
86
  return build_config
87
87
 
88
88
  def export_document(self) -> list[Data]:
89
- documents = extract_docling_documents(self.data_inputs, self.doc_key)
89
+ documents, warning = extract_docling_documents(self.data_inputs, self.doc_key)
90
+ if warning:
91
+ self.status = warning
90
92
 
91
93
  results: list[Data] = []
92
94
  try: