lfx-nightly 0.1.12.dev14__py3-none-any.whl → 0.1.12.dev15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lfx-nightly might be problematic. Click here for more details.

Files changed (127) hide show
  1. lfx/base/agents/events.py +40 -29
  2. lfx/base/constants.py +1 -1
  3. lfx/base/data/docling_utils.py +43 -8
  4. lfx/base/data/utils.py +3 -3
  5. lfx/base/knowledge_bases/__init__.py +3 -0
  6. lfx/base/knowledge_bases/knowledge_base_utils.py +137 -0
  7. lfx/base/models/anthropic_constants.py +3 -1
  8. lfx/base/models/model_input_constants.py +1 -1
  9. lfx/base/vectorstores/vector_store_connection_decorator.py +1 -1
  10. lfx/components/agentql/agentql_api.py +1 -1
  11. lfx/components/agents/agent.py +62 -17
  12. lfx/components/agents/mcp_component.py +11 -1
  13. lfx/components/aiml/aiml.py +4 -1
  14. lfx/components/amazon/amazon_bedrock_converse.py +196 -0
  15. lfx/components/amazon/amazon_bedrock_model.py +5 -1
  16. lfx/components/azure/azure_openai.py +1 -1
  17. lfx/components/azure/azure_openai_embeddings.py +1 -1
  18. lfx/components/clickhouse/clickhouse.py +1 -1
  19. lfx/components/confluence/confluence.py +1 -1
  20. lfx/components/crewai/crewai.py +1 -0
  21. lfx/components/crewai/hierarchical_crew.py +1 -0
  22. lfx/components/crewai/hierarchical_task.py +1 -0
  23. lfx/components/crewai/sequential_crew.py +1 -0
  24. lfx/components/crewai/sequential_task.py +1 -0
  25. lfx/components/crewai/sequential_task_agent.py +1 -0
  26. lfx/components/data/api_request.py +13 -3
  27. lfx/components/data/csv_to_data.py +1 -0
  28. lfx/components/data/file.py +71 -25
  29. lfx/components/data/json_to_data.py +1 -0
  30. lfx/components/datastax/astra_db.py +2 -1
  31. lfx/components/datastax/astra_vectorize.py +3 -5
  32. lfx/components/datastax/astradb_tool.py +5 -1
  33. lfx/components/datastax/astradb_vectorstore.py +8 -1
  34. lfx/components/deactivated/chat_litellm_model.py +1 -1
  35. lfx/components/deactivated/metal.py +1 -1
  36. lfx/components/docling/docling_inline.py +23 -9
  37. lfx/components/elastic/elasticsearch.py +1 -1
  38. lfx/components/elastic/opensearch.py +1 -1
  39. lfx/components/embeddings/similarity.py +1 -0
  40. lfx/components/embeddings/text_embedder.py +1 -0
  41. lfx/components/firecrawl/firecrawl_crawl_api.py +1 -1
  42. lfx/components/firecrawl/firecrawl_extract_api.py +1 -1
  43. lfx/components/firecrawl/firecrawl_map_api.py +1 -1
  44. lfx/components/firecrawl/firecrawl_scrape_api.py +1 -1
  45. lfx/components/google/gmail.py +1 -0
  46. lfx/components/google/google_generative_ai_embeddings.py +1 -1
  47. lfx/components/helpers/memory.py +8 -6
  48. lfx/components/helpers/output_parser.py +1 -0
  49. lfx/components/helpers/store_message.py +1 -0
  50. lfx/components/huggingface/huggingface.py +3 -1
  51. lfx/components/huggingface/huggingface_inference_api.py +1 -1
  52. lfx/components/ibm/watsonx.py +1 -1
  53. lfx/components/ibm/watsonx_embeddings.py +1 -1
  54. lfx/components/icosacomputing/combinatorial_reasoner.py +1 -1
  55. lfx/components/input_output/chat.py +0 -27
  56. lfx/components/input_output/chat_output.py +3 -27
  57. lfx/components/knowledge_bases/__init__.py +34 -0
  58. lfx/components/knowledge_bases/ingestion.py +686 -0
  59. lfx/components/knowledge_bases/retrieval.py +256 -0
  60. lfx/components/langchain_utilities/langchain_hub.py +1 -1
  61. lfx/components/langwatch/langwatch.py +1 -1
  62. lfx/components/logic/conditional_router.py +40 -3
  63. lfx/components/logic/data_conditional_router.py +1 -0
  64. lfx/components/logic/flow_tool.py +2 -1
  65. lfx/components/logic/pass_message.py +1 -0
  66. lfx/components/logic/sub_flow.py +2 -1
  67. lfx/components/milvus/milvus.py +1 -1
  68. lfx/components/olivya/olivya.py +1 -1
  69. lfx/components/processing/alter_metadata.py +1 -0
  70. lfx/components/processing/combine_text.py +1 -0
  71. lfx/components/processing/create_data.py +1 -0
  72. lfx/components/processing/data_to_dataframe.py +1 -0
  73. lfx/components/processing/extract_key.py +1 -0
  74. lfx/components/processing/filter_data.py +1 -0
  75. lfx/components/processing/filter_data_values.py +1 -0
  76. lfx/components/processing/json_cleaner.py +1 -0
  77. lfx/components/processing/merge_data.py +1 -0
  78. lfx/components/processing/message_to_data.py +1 -0
  79. lfx/components/processing/parse_data.py +1 -0
  80. lfx/components/processing/parse_dataframe.py +1 -0
  81. lfx/components/processing/parse_json_data.py +1 -0
  82. lfx/components/processing/regex.py +1 -0
  83. lfx/components/processing/select_data.py +1 -0
  84. lfx/components/processing/structured_output.py +7 -3
  85. lfx/components/processing/update_data.py +1 -0
  86. lfx/components/prototypes/__init__.py +8 -7
  87. lfx/components/qdrant/qdrant.py +1 -1
  88. lfx/components/redis/redis_chat.py +1 -1
  89. lfx/components/tools/__init__.py +0 -6
  90. lfx/components/tools/calculator.py +2 -1
  91. lfx/components/tools/python_code_structured_tool.py +1 -0
  92. lfx/components/tools/python_repl.py +2 -1
  93. lfx/components/tools/search_api.py +2 -1
  94. lfx/components/tools/serp_api.py +2 -1
  95. lfx/components/tools/tavily_search_tool.py +1 -0
  96. lfx/components/tools/wikidata_api.py +2 -1
  97. lfx/components/tools/wikipedia_api.py +2 -1
  98. lfx/components/tools/yahoo_finance.py +2 -1
  99. lfx/components/twelvelabs/video_embeddings.py +1 -1
  100. lfx/components/upstash/upstash.py +1 -1
  101. lfx/components/vectorstores/astradb_graph.py +8 -1
  102. lfx/components/vectorstores/local_db.py +1 -0
  103. lfx/components/vectorstores/weaviate.py +1 -1
  104. lfx/components/wolframalpha/wolfram_alpha_api.py +1 -1
  105. lfx/components/zep/zep.py +2 -1
  106. lfx/custom/attributes.py +1 -0
  107. lfx/graph/graph/base.py +61 -4
  108. lfx/inputs/inputs.py +1 -0
  109. lfx/log/logger.py +31 -11
  110. lfx/schema/message.py +6 -1
  111. lfx/schema/schema.py +4 -0
  112. lfx/services/__init__.py +3 -0
  113. lfx/services/mcp_composer/__init__.py +6 -0
  114. lfx/services/mcp_composer/factory.py +16 -0
  115. lfx/services/mcp_composer/service.py +599 -0
  116. lfx/services/schema.py +1 -0
  117. lfx/services/settings/auth.py +18 -15
  118. lfx/services/settings/base.py +38 -0
  119. lfx/services/settings/constants.py +4 -1
  120. lfx/services/settings/feature_flags.py +0 -1
  121. lfx/template/frontend_node/base.py +2 -0
  122. lfx/utils/image.py +1 -1
  123. {lfx_nightly-0.1.12.dev14.dist-info → lfx_nightly-0.1.12.dev15.dist-info}/METADATA +1 -1
  124. {lfx_nightly-0.1.12.dev14.dist-info → lfx_nightly-0.1.12.dev15.dist-info}/RECORD +126 -118
  125. lfx/components/datastax/astradb.py +0 -1285
  126. {lfx_nightly-0.1.12.dev14.dist-info → lfx_nightly-0.1.12.dev15.dist-info}/WHEEL +0 -0
  127. {lfx_nightly-0.1.12.dev14.dist-info → lfx_nightly-0.1.12.dev15.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,196 @@
1
+ from langflow.base.models.aws_constants import AWS_REGIONS, AWS_MODEL_IDs
2
+ from langflow.base.models.model import LCModelComponent
3
+ from langflow.field_typing import LanguageModel
4
+ from langflow.inputs.inputs import BoolInput, FloatInput, IntInput, MessageTextInput, SecretStrInput
5
+ from langflow.io import DictInput, DropdownInput
6
+
7
+
8
+ class AmazonBedrockConverseComponent(LCModelComponent):
9
+ display_name: str = "Amazon Bedrock Converse"
10
+ description: str = (
11
+ "Generate text using Amazon Bedrock LLMs with the modern Converse API "
12
+ "for improved conversation handling. We recommend the Converse API for users "
13
+ "who do not need to use custom models. It can be accessed using ChatBedrockConverse."
14
+ )
15
+ icon = "Amazon"
16
+ name = "AmazonBedrockConverseModel"
17
+ beta = True
18
+
19
+ inputs = [
20
+ *LCModelComponent._base_inputs,
21
+ DropdownInput(
22
+ name="model_id",
23
+ display_name="Model ID",
24
+ options=AWS_MODEL_IDs,
25
+ value="anthropic.claude-3-5-sonnet-20241022-v2:0",
26
+ info="List of available model IDs to choose from.",
27
+ ),
28
+ SecretStrInput(
29
+ name="aws_access_key_id",
30
+ display_name="AWS Access Key ID",
31
+ info="The access key for your AWS account. "
32
+ "Usually set in Python code as the environment variable 'AWS_ACCESS_KEY_ID'.",
33
+ value="AWS_ACCESS_KEY_ID",
34
+ required=True,
35
+ ),
36
+ SecretStrInput(
37
+ name="aws_secret_access_key",
38
+ display_name="AWS Secret Access Key",
39
+ info="The secret key for your AWS account. "
40
+ "Usually set in Python code as the environment variable 'AWS_SECRET_ACCESS_KEY'.",
41
+ value="AWS_SECRET_ACCESS_KEY",
42
+ required=True,
43
+ ),
44
+ SecretStrInput(
45
+ name="aws_session_token",
46
+ display_name="AWS Session Token",
47
+ advanced=True,
48
+ info="The session key for your AWS account. "
49
+ "Only needed for temporary credentials. "
50
+ "Usually set in Python code as the environment variable 'AWS_SESSION_TOKEN'.",
51
+ load_from_db=False,
52
+ ),
53
+ SecretStrInput(
54
+ name="credentials_profile_name",
55
+ display_name="Credentials Profile Name",
56
+ advanced=True,
57
+ info="The name of the profile to use from your "
58
+ "~/.aws/credentials file. "
59
+ "If not provided, the default profile will be used.",
60
+ load_from_db=False,
61
+ ),
62
+ DropdownInput(
63
+ name="region_name",
64
+ display_name="Region Name",
65
+ value="us-east-1",
66
+ options=AWS_REGIONS,
67
+ info="The AWS region where your Bedrock resources are located.",
68
+ ),
69
+ MessageTextInput(
70
+ name="endpoint_url",
71
+ display_name="Endpoint URL",
72
+ advanced=True,
73
+ info="The URL of the Bedrock endpoint to use.",
74
+ ),
75
+ # Model-specific parameters for fine control
76
+ FloatInput(
77
+ name="temperature",
78
+ display_name="Temperature",
79
+ value=0.7,
80
+ info="Controls randomness in output. Higher values make output more random.",
81
+ advanced=True,
82
+ ),
83
+ IntInput(
84
+ name="max_tokens",
85
+ display_name="Max Tokens",
86
+ value=4096,
87
+ info="Maximum number of tokens to generate.",
88
+ advanced=True,
89
+ ),
90
+ FloatInput(
91
+ name="top_p",
92
+ display_name="Top P",
93
+ value=0.9,
94
+ info="Nucleus sampling parameter. Controls diversity of output.",
95
+ advanced=True,
96
+ ),
97
+ IntInput(
98
+ name="top_k",
99
+ display_name="Top K",
100
+ value=250,
101
+ info="Limits the number of highest probability vocabulary tokens to consider. "
102
+ "Note: Not all models support top_k. Use 'Additional Model Fields' for manual configuration if needed.",
103
+ advanced=True,
104
+ ),
105
+ BoolInput(
106
+ name="disable_streaming",
107
+ display_name="Disable Streaming",
108
+ value=False,
109
+ info="If True, disables streaming responses. Useful for batch processing.",
110
+ advanced=True,
111
+ ),
112
+ DictInput(
113
+ name="additional_model_fields",
114
+ display_name="Additional Model Fields",
115
+ advanced=True,
116
+ is_list=True,
117
+ info="Additional model-specific parameters for fine-tuning behavior.",
118
+ ),
119
+ ]
120
+
121
+ def build_model(self) -> LanguageModel: # type: ignore[type-var]
122
+ try:
123
+ from langchain_aws.chat_models.bedrock_converse import ChatBedrockConverse
124
+ except ImportError as e:
125
+ msg = "langchain_aws is not installed. Please install it with `pip install langchain_aws`."
126
+ raise ImportError(msg) from e
127
+
128
+ # Prepare initialization parameters
129
+ init_params = {
130
+ "model": self.model_id,
131
+ "region_name": self.region_name,
132
+ }
133
+
134
+ # Add AWS credentials if provided
135
+ if self.aws_access_key_id:
136
+ init_params["aws_access_key_id"] = self.aws_access_key_id
137
+ if self.aws_secret_access_key:
138
+ init_params["aws_secret_access_key"] = self.aws_secret_access_key
139
+ if self.aws_session_token:
140
+ init_params["aws_session_token"] = self.aws_session_token
141
+ if self.credentials_profile_name:
142
+ init_params["credentials_profile_name"] = self.credentials_profile_name
143
+ if self.endpoint_url:
144
+ init_params["endpoint_url"] = self.endpoint_url
145
+
146
+ # Add model parameters directly as supported by ChatBedrockConverse
147
+ if hasattr(self, "temperature") and self.temperature is not None:
148
+ init_params["temperature"] = self.temperature
149
+ if hasattr(self, "max_tokens") and self.max_tokens is not None:
150
+ init_params["max_tokens"] = self.max_tokens
151
+ if hasattr(self, "top_p") and self.top_p is not None:
152
+ init_params["top_p"] = self.top_p
153
+
154
+ # Handle streaming - only disable if explicitly requested
155
+ if hasattr(self, "disable_streaming") and self.disable_streaming:
156
+ init_params["disable_streaming"] = True
157
+
158
+ # Handle additional model request fields carefully
159
+ # Based on the error, inferenceConfig should not be passed as additional fields for some models
160
+ additional_model_request_fields = {}
161
+
162
+ # Only add top_k if user explicitly provided additional fields or if needed for specific models
163
+ if hasattr(self, "additional_model_fields") and self.additional_model_fields:
164
+ for field in self.additional_model_fields:
165
+ if isinstance(field, dict):
166
+ additional_model_request_fields.update(field)
167
+
168
+ # For now, don't automatically add inferenceConfig for top_k to avoid validation errors
169
+ # Users can manually add it via additional_model_fields if their model supports it
170
+
171
+ # Only add if we have actual additional fields
172
+ if additional_model_request_fields:
173
+ init_params["additional_model_request_fields"] = additional_model_request_fields
174
+
175
+ try:
176
+ output = ChatBedrockConverse(**init_params)
177
+ except Exception as e:
178
+ # Provide helpful error message with fallback suggestions
179
+ error_details = str(e)
180
+ if "validation error" in error_details.lower():
181
+ msg = (
182
+ f"ChatBedrockConverse validation error: {error_details}. "
183
+ f"This may be due to incompatible parameters for model '{self.model_id}'. "
184
+ f"Consider adjusting the model parameters or trying the legacy Amazon Bedrock component."
185
+ )
186
+ elif "converse api" in error_details.lower():
187
+ msg = (
188
+ f"Converse API error: {error_details}. "
189
+ f"The model '{self.model_id}' may not support the Converse API. "
190
+ f"Try using the legacy Amazon Bedrock component instead."
191
+ )
192
+ else:
193
+ msg = f"Could not initialize ChatBedrockConverse: {error_details}"
194
+ raise ValueError(msg) from e
195
+
196
+ return output
@@ -7,7 +7,11 @@ from lfx.io import DictInput, DropdownInput
7
7
 
8
8
  class AmazonBedrockComponent(LCModelComponent):
9
9
  display_name: str = "Amazon Bedrock"
10
- description: str = "Generate text using Amazon Bedrock LLMs."
10
+ description: str = (
11
+ "Generate text using Amazon Bedrock LLMs with the legacy ChatBedrock API. "
12
+ "For better compatibility, newer features, and improved conversation handling, "
13
+ "we recommend using Amazon Bedrock Converse instead."
14
+ )
11
15
  icon = "Amazon"
12
16
  name = "AmazonBedrockModel"
13
17
 
@@ -39,7 +39,7 @@ class AzureChatOpenAIComponent(LCModelComponent):
39
39
  required=True,
40
40
  ),
41
41
  MessageTextInput(name="azure_deployment", display_name="Deployment Name", required=True),
42
- SecretStrInput(name="api_key", display_name="API Key", required=True),
42
+ SecretStrInput(name="api_key", display_name="Azure Chat OpenAI API Key", required=True),
43
43
  DropdownInput(
44
44
  name="api_version",
45
45
  display_name="API Version",
@@ -50,7 +50,7 @@ class AzureOpenAIEmbeddingsComponent(LCModelComponent):
50
50
  ),
51
51
  SecretStrInput(
52
52
  name="api_key",
53
- display_name="API Key",
53
+ display_name="Azure OpenAI API Key",
54
54
  required=True,
55
55
  ),
56
56
  IntInput(
@@ -26,7 +26,7 @@ class ClickhouseVectorStoreComponent(LCVectorStoreComponent):
26
26
  StrInput(name="database", display_name="database", required=True),
27
27
  StrInput(name="table", display_name="Table name", required=True),
28
28
  StrInput(name="username", display_name="The ClickHouse user name.", required=True),
29
- SecretStrInput(name="password", display_name="The password for username.", required=True),
29
+ SecretStrInput(name="password", display_name="Clickhouse Password", required=True),
30
30
  DropdownInput(
31
31
  name="index_type",
32
32
  display_name="index_type",
@@ -29,7 +29,7 @@ class ConfluenceComponent(Component):
29
29
  ),
30
30
  SecretStrInput(
31
31
  name="api_key",
32
- display_name="API Key",
32
+ display_name="Confluence API Key",
33
33
  required=True,
34
34
  info="Atlassian Key. Create at: https://id.atlassian.com/manage-profile/security/api-tokens",
35
35
  ),
@@ -21,6 +21,7 @@ class CrewAIAgentComponent(Component):
21
21
  documentation: str = "https://docs.crewai.com/how-to/LLM-Connections/"
22
22
  icon = "CrewAI"
23
23
  legacy = True
24
+ replacement = "agents.Agent"
24
25
 
25
26
  inputs = [
26
27
  MultilineInput(name="role", display_name="Role", info="The role of the agent."),
@@ -10,6 +10,7 @@ class HierarchicalCrewComponent(BaseCrewComponent):
10
10
  documentation: str = "https://docs.crewai.com/how-to/Hierarchical/"
11
11
  icon = "CrewAI"
12
12
  legacy = True
13
+ replacement = "agents.Agent"
13
14
 
14
15
  inputs = [
15
16
  *BaseCrewComponent.get_base_inputs(),
@@ -8,6 +8,7 @@ class HierarchicalTaskComponent(Component):
8
8
  description: str = "Each task must have a description, an expected output and an agent responsible for execution."
9
9
  icon = "CrewAI"
10
10
  legacy = True
11
+ replacement = "agents.Agent"
11
12
  inputs = [
12
13
  MultilineInput(
13
14
  name="task_description",
@@ -9,6 +9,7 @@ class SequentialCrewComponent(BaseCrewComponent):
9
9
  documentation: str = "https://docs.crewai.com/how-to/Sequential/"
10
10
  icon = "CrewAI"
11
11
  legacy = True
12
+ replacement = "agents.Agent"
12
13
 
13
14
  inputs = [
14
15
  *BaseCrewComponent.get_base_inputs(),
@@ -8,6 +8,7 @@ class SequentialTaskComponent(Component):
8
8
  description: str = "Each task must have a description, an expected output and an agent responsible for execution."
9
9
  icon = "CrewAI"
10
10
  legacy = True
11
+ replacement = "agents.Agent"
11
12
  inputs = [
12
13
  MultilineInput(
13
14
  name="task_description",
@@ -9,6 +9,7 @@ class SequentialTaskAgentComponent(Component):
9
9
  documentation = "https://docs.crewai.com/how-to/LLM-Connections/"
10
10
  icon = "CrewAI"
11
11
  legacy = True
12
+ replacement = "agents.Agent"
12
13
 
13
14
  inputs = [
14
15
  # Agent inputs
@@ -188,6 +188,8 @@ class APIRequestComponent(Component):
188
188
  """Process the body input into a valid dictionary."""
189
189
  if body is None:
190
190
  return {}
191
+ if hasattr(body, "data"):
192
+ body = body.data
191
193
  if isinstance(body, dict):
192
194
  return self._process_dict_body(body)
193
195
  if isinstance(body, str):
@@ -212,10 +214,18 @@ class APIRequestComponent(Component):
212
214
  processed_dict = {}
213
215
  try:
214
216
  for item in body:
215
- if not self._is_valid_key_value_item(item):
217
+ # Unwrap Data objects
218
+ current_item = item
219
+ if hasattr(item, "data"):
220
+ unwrapped_data = item.data
221
+ # If the unwrapped data is a dict but not key-value format, use it directly
222
+ if isinstance(unwrapped_data, dict) and not self._is_valid_key_value_item(unwrapped_data):
223
+ return unwrapped_data
224
+ current_item = unwrapped_data
225
+ if not self._is_valid_key_value_item(current_item):
216
226
  continue
217
- key = item["key"]
218
- value = self._parse_json_value(item["value"])
227
+ key = current_item["key"]
228
+ value = self._parse_json_value(current_item["value"])
219
229
  processed_dict[key] = value
220
230
  except (KeyError, TypeError, ValueError) as e:
221
231
  self.log(f"Failed to process body list: {e}")
@@ -13,6 +13,7 @@ class CSVToDataComponent(Component):
13
13
  icon = "file-spreadsheet"
14
14
  name = "CSVtoData"
15
15
  legacy = True
16
+ replacement = ["data.File"]
16
17
 
17
18
  inputs = [
18
19
  FileInput(
@@ -1,8 +1,7 @@
1
- """Enhanced file component with clearer structure and Docling isolation.
1
+ """Enhanced file component with Docling support and process isolation.
2
2
 
3
3
  Notes:
4
4
  -----
5
- - Functionality is preserved with minimal behavioral changes.
6
5
  - ALL Docling parsing/export runs in a separate OS process to prevent memory
7
6
  growth and native library state from impacting the main Langflow process.
8
7
  - Standard text/structured parsing continues to use existing BaseFileComponent
@@ -31,28 +30,22 @@ class FileComponent(BaseFileComponent):
31
30
  """File component with optional Docling processing (isolated in a subprocess)."""
32
31
 
33
32
  display_name = "File"
34
- description = "Loads content from files with optional advanced document processing and export using Docling."
33
+ description = "Loads content from one or more files."
35
34
  documentation: str = "https://docs.langflow.org/components-data#file"
36
35
  icon = "file-text"
37
36
  name = "File"
38
37
 
39
38
  # Docling-supported/compatible extensions; TEXT_FILE_TYPES are supported by the base loader.
40
39
  VALID_EXTENSIONS = [
40
+ *TEXT_FILE_TYPES,
41
41
  "adoc",
42
42
  "asciidoc",
43
43
  "asc",
44
44
  "bmp",
45
- "csv",
46
45
  "dotx",
47
46
  "dotm",
48
47
  "docm",
49
- "docx",
50
- "htm",
51
- "html",
52
48
  "jpeg",
53
- "json",
54
- "md",
55
- "pdf",
56
49
  "png",
57
50
  "potx",
58
51
  "ppsx",
@@ -61,13 +54,10 @@ class FileComponent(BaseFileComponent):
61
54
  "ppsm",
62
55
  "pptx",
63
56
  "tiff",
64
- "txt",
65
57
  "xls",
66
58
  "xlsx",
67
59
  "xhtml",
68
- "xml",
69
60
  "webp",
70
- *TEXT_FILE_TYPES,
71
61
  ]
72
62
 
73
63
  # Fixed export settings used when markdown export is requested.
@@ -91,6 +81,7 @@ class FileComponent(BaseFileComponent):
91
81
  info=(
92
82
  "Enable advanced document processing and export with Docling for PDFs, images, and office documents. "
93
83
  "Available only for single file processing."
84
+ "Note that advanced document processing can consume significant resources."
94
85
  ),
95
86
  show=False,
96
87
  ),
@@ -101,13 +92,14 @@ class FileComponent(BaseFileComponent):
101
92
  options=["standard", "vlm"],
102
93
  value="standard",
103
94
  advanced=True,
95
+ real_time_refresh=True,
104
96
  ),
105
97
  DropdownInput(
106
98
  name="ocr_engine",
107
99
  display_name="OCR Engine",
108
100
  info="OCR engine to use. Only available when pipeline is set to 'standard'.",
109
- options=["", "easyocr"],
110
- value="",
101
+ options=["None", "easyocr"],
102
+ value="easyocr",
111
103
  show=False,
112
104
  advanced=True,
113
105
  ),
@@ -190,16 +182,25 @@ class FileComponent(BaseFileComponent):
190
182
  if f in build_config:
191
183
  build_config[f]["show"] = False
192
184
 
185
+ # Docling Processing
193
186
  elif field_name == "advanced_mode":
194
187
  for f in ("pipeline", "ocr_engine", "doc_key", "md_image_placeholder", "md_page_break_placeholder"):
195
188
  if f in build_config:
196
189
  build_config[f]["show"] = bool(field_value)
197
190
 
191
+ elif field_name == "pipeline":
192
+ if field_value == "standard":
193
+ build_config["ocr_engine"]["show"] = True
194
+ build_config["ocr_engine"]["value"] = "easyocr"
195
+ else:
196
+ build_config["ocr_engine"]["show"] = False
197
+ build_config["ocr_engine"]["value"] = "None"
198
+
198
199
  return build_config
199
200
 
200
201
  def update_outputs(self, frontend_node: dict[str, Any], field_name: str, field_value: Any) -> dict[str, Any]: # noqa: ARG002
201
202
  """Dynamically show outputs based on file count/type and advanced mode."""
202
- if field_name not in ["path", "advanced_mode"]:
203
+ if field_name not in ["path", "advanced_mode", "pipeline"]:
203
204
  return frontend_node
204
205
 
205
206
  template = frontend_node.get("template", {})
@@ -222,10 +223,10 @@ class FileComponent(BaseFileComponent):
222
223
  advanced_mode = frontend_node.get("template", {}).get("advanced_mode", {}).get("value", False)
223
224
  if advanced_mode:
224
225
  frontend_node["outputs"].append(
225
- Output(display_name="Structured Output", name="advanced", method="load_files_advanced"),
226
+ Output(display_name="Structured Output", name="advanced_dataframe", method="load_files_dataframe"),
226
227
  )
227
228
  frontend_node["outputs"].append(
228
- Output(display_name="Markdown", name="markdown", method="load_files_markdown"),
229
+ Output(display_name="Markdown", name="advanced_markdown", method="load_files_markdown"),
229
230
  )
230
231
  frontend_node["outputs"].append(
231
232
  Output(display_name="File Path", name="path", method="load_files_path"),
@@ -296,10 +297,15 @@ class FileComponent(BaseFileComponent):
296
297
  "md_image_placeholder": str(self.md_image_placeholder),
297
298
  "md_page_break_placeholder": str(self.md_page_break_placeholder),
298
299
  "pipeline": str(self.pipeline),
299
- "ocr_engine": str(self.ocr_engine) if getattr(self, "ocr_engine", "") else None,
300
+ "ocr_engine": (
301
+ self.ocr_engine if self.ocr_engine and self.ocr_engine != "None" and self.pipeline != "vlm" else None
302
+ ),
300
303
  }
301
304
 
302
- # The child is a tiny, self-contained script to keep memory/state isolated.
305
+ self.log(f"Starting Docling subprocess for file: {file_path}")
306
+ self.log(args)
307
+
308
+ # Child script for isolating the docling processing
303
309
  child_script = textwrap.dedent(
304
310
  r"""
305
311
  import json, sys
@@ -346,11 +352,15 @@ class FileComponent(BaseFileComponent):
346
352
  raise ImportError(f"Docling imports failed: {e}") from e
347
353
 
348
354
  def create_converter(strategy, input_format, DocumentConverter, pipeline, ocr_engine):
349
- if strategy == "latest" and pipeline == "standard":
355
+ # --- Standard PDF/IMAGE pipeline (your existing behavior), with optional OCR ---
356
+ if pipeline == "standard":
350
357
  try:
351
358
  from docling.datamodel.pipeline_options import PdfPipelineOptions # type: ignore
352
359
  from docling.document_converter import PdfFormatOption # type: ignore
360
+
353
361
  pipe = PdfPipelineOptions()
362
+ pipe.do_ocr = False
363
+
354
364
  if ocr_engine:
355
365
  try:
356
366
  from docling.models.factories import get_ocr_factory # type: ignore
@@ -358,15 +368,39 @@ class FileComponent(BaseFileComponent):
358
368
  fac = get_ocr_factory(allow_external_plugins=False)
359
369
  pipe.ocr_options = fac.create_options(kind=ocr_engine)
360
370
  except Exception:
371
+ # If OCR setup fails, disable it
361
372
  pipe.do_ocr = False
373
+
362
374
  fmt = {}
363
375
  if hasattr(input_format, "PDF"):
364
376
  fmt[getattr(input_format, "PDF")] = PdfFormatOption(pipeline_options=pipe)
365
377
  if hasattr(input_format, "IMAGE"):
366
378
  fmt[getattr(input_format, "IMAGE")] = PdfFormatOption(pipeline_options=pipe)
379
+
367
380
  return DocumentConverter(format_options=fmt)
368
381
  except Exception:
369
382
  return DocumentConverter()
383
+
384
+ # --- Vision-Language Model (VLM) pipeline ---
385
+ if pipeline == "vlm":
386
+ try:
387
+ from docling.pipeline.vlm_pipeline import VlmPipeline
388
+ from docling.document_converter import PdfFormatOption # type: ignore
389
+
390
+ vl_pipe = VlmPipelineOptions()
391
+
392
+ # VLM paths generally don't need OCR; keep OCR off by default here.
393
+ fmt = {}
394
+ if hasattr(input_format, "PDF"):
395
+ fmt[getattr(input_format, "PDF")] = PdfFormatOption(pipeline_cls=VlmPipeline)
396
+ if hasattr(input_format, "IMAGE"):
397
+ fmt[getattr(input_format, "IMAGE")] = PdfFormatOption(pipeline_cls=VlmPipeline)
398
+
399
+ return DocumentConverter(format_options=fmt)
400
+ except Exception:
401
+ return DocumentConverter()
402
+
403
+ # --- Fallback: default converter with no special options ---
370
404
  return DocumentConverter()
371
405
 
372
406
  def export_markdown(document, ImageRefMode, image_mode, img_ph, pg_ph):
@@ -565,13 +599,25 @@ class FileComponent(BaseFileComponent):
565
599
 
566
600
  # ------------------------------ Output helpers -----------------------------------
567
601
 
568
- def load_files_advanced(self) -> DataFrame:
569
- """Load files using advanced Docling processing and export to an advanced format."""
602
+ def load_files_helper(self) -> DataFrame:
603
+ result = self.load_files()
604
+
605
+ # Error condition - raise error if no text and an error is present
606
+ if not hasattr(result, "text"):
607
+ if hasattr(result, "error"):
608
+ raise ValueError(result.error[0])
609
+ msg = "No content generated."
610
+ raise ValueError(msg)
611
+
612
+ return result
613
+
614
+ def load_files_dataframe(self) -> DataFrame:
615
+ """Load files using advanced Docling processing and export to DataFrame format."""
570
616
  self.markdown = False
571
- return self.load_files()
617
+ return self.load_files_helper()
572
618
 
573
619
  def load_files_markdown(self) -> Message:
574
620
  """Load files using advanced Docling processing and export to Markdown format."""
575
621
  self.markdown = True
576
- result = self.load_files()
622
+ result = self.load_files_helper()
577
623
  return Message(text=str(result.text[0]))
@@ -16,6 +16,7 @@ class JSONToDataComponent(Component):
16
16
  icon = "braces"
17
17
  name = "JSONtoData"
18
18
  legacy = True
19
+ replacement = ["data.File"]
19
20
 
20
21
  inputs = [
21
22
  FileInput(
@@ -22,7 +22,7 @@ class AstraDBChatMemory(LCChatMemoryComponent):
22
22
  ),
23
23
  SecretStrInput(
24
24
  name="api_endpoint",
25
- display_name="API Endpoint",
25
+ display_name="Astra DB API Endpoint",
26
26
  info="API endpoint URL for the Astra DB service.",
27
27
  value="ASTRA_DB_API_ENDPOINT",
28
28
  required=True,
@@ -49,6 +49,7 @@ class AstraDBChatMemory(LCChatMemoryComponent):
49
49
 
50
50
  def build_message_history(self) -> Memory:
51
51
  try:
52
+ from astrapy.admin import parse_api_endpoint
52
53
  from langchain_astradb.chat_message_histories import AstraDBChatMessageHistory
53
54
 
54
55
  except ImportError as e:
@@ -6,15 +6,13 @@ from lfx.template.field.base import Output
6
6
 
7
7
 
8
8
  class AstraVectorizeComponent(Component):
9
- display_name: str = "Astra Vectorize [DEPRECATED]"
10
- description: str = (
11
- "Configuration options for Astra Vectorize server-side embeddings. "
12
- "This component is deprecated. Please use the Astra DB Component directly."
13
- )
9
+ display_name: str = "Astra Vectorize"
10
+ description: str = "Configuration options for Astra Vectorize server-side embeddings. "
14
11
  documentation: str = "https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html"
15
12
  legacy = True
16
13
  icon = "AstraDB"
17
14
  name = "AstraVectorize"
15
+ replacement = ["datastax.AstraDB"]
18
16
 
19
17
  VECTORIZE_PROVIDERS_MAPPING = {
20
18
  "Azure OpenAI": ["azureOpenAI", ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"]],
@@ -3,7 +3,6 @@ from datetime import datetime, timezone
3
3
  from typing import Any
4
4
 
5
5
  from astrapy import Collection, DataAPIClient, Database
6
- from astrapy.admin import parse_api_endpoint
7
6
  from langchain_core.tools import StructuredTool, Tool
8
7
  from pydantic import BaseModel, Field, create_model
9
8
 
@@ -192,6 +191,11 @@ class AstraDBToolComponent(LCToolComponent):
192
191
  _cached_collection: Collection | None = None
193
192
 
194
193
  def _build_collection(self):
194
+ try:
195
+ from astrapy.admin import parse_api_endpoint
196
+ except ImportError as e:
197
+ msg = "Could not import Astra DB integration package. Please install it with `uv pip install astrapy`."
198
+ raise ImportError(msg) from e
195
199
  if self._cached_collection:
196
200
  return self._cached_collection
197
201
 
@@ -192,7 +192,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
192
192
  input_types=["Embeddings"],
193
193
  info="Specify the Embedding Model. Not required for Astra Vectorize collections.",
194
194
  required=False,
195
- show=False,
195
+ show=True,
196
196
  ),
197
197
  *LCVectorStoreComponent.inputs,
198
198
  DropdownInput(
@@ -790,6 +790,13 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
790
790
  build_config["api_endpoint"]["options"] = []
791
791
  build_config["api_endpoint"]["value"] = ""
792
792
 
793
+ # Reset hybrid search options
794
+ build_config["reranker"]["options"] = []
795
+ build_config["reranker"]["value"] = ""
796
+ build_config["reranker"]["show"] = False
797
+ build_config["lexical_terms"]["value"] = ""
798
+ build_config["lexical_terms"]["show"] = False
799
+
793
800
  # Reset collection configuration
794
801
  collection_config = build_config["collection_name"]
795
802
  collection_config.update({"options": [], "options_metadata": [], "value": "", "show": False})