lfx-nightly 0.1.12.dev14__py3-none-any.whl → 0.1.12.dev16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lfx-nightly might be problematic. Click here for more details.
- lfx/base/agents/events.py +40 -29
- lfx/base/constants.py +1 -1
- lfx/base/data/docling_utils.py +43 -8
- lfx/base/data/utils.py +3 -3
- lfx/base/knowledge_bases/__init__.py +3 -0
- lfx/base/knowledge_bases/knowledge_base_utils.py +137 -0
- lfx/base/models/anthropic_constants.py +3 -1
- lfx/base/models/model_input_constants.py +1 -1
- lfx/base/vectorstores/vector_store_connection_decorator.py +1 -1
- lfx/components/agentql/agentql_api.py +1 -1
- lfx/components/agents/agent.py +62 -17
- lfx/components/agents/mcp_component.py +11 -1
- lfx/components/aiml/aiml.py +4 -1
- lfx/components/amazon/amazon_bedrock_converse.py +196 -0
- lfx/components/amazon/amazon_bedrock_model.py +5 -1
- lfx/components/azure/azure_openai.py +1 -1
- lfx/components/azure/azure_openai_embeddings.py +1 -1
- lfx/components/chroma/chroma.py +4 -2
- lfx/components/clickhouse/clickhouse.py +1 -1
- lfx/components/confluence/confluence.py +1 -1
- lfx/components/crewai/crewai.py +1 -0
- lfx/components/crewai/hierarchical_crew.py +1 -0
- lfx/components/crewai/hierarchical_task.py +1 -0
- lfx/components/crewai/sequential_crew.py +1 -0
- lfx/components/crewai/sequential_task.py +1 -0
- lfx/components/crewai/sequential_task_agent.py +1 -0
- lfx/components/data/api_request.py +13 -3
- lfx/components/data/csv_to_data.py +1 -0
- lfx/components/data/file.py +71 -25
- lfx/components/data/json_to_data.py +1 -0
- lfx/components/datastax/astra_db.py +2 -1
- lfx/components/datastax/astra_vectorize.py +3 -5
- lfx/components/datastax/astradb_tool.py +5 -1
- lfx/components/datastax/astradb_vectorstore.py +8 -1
- lfx/components/deactivated/chat_litellm_model.py +1 -1
- lfx/components/deactivated/metal.py +1 -1
- lfx/components/docling/docling_inline.py +23 -9
- lfx/components/elastic/elasticsearch.py +1 -1
- lfx/components/elastic/opensearch.py +1 -1
- lfx/components/embeddings/similarity.py +1 -0
- lfx/components/embeddings/text_embedder.py +1 -0
- lfx/components/firecrawl/firecrawl_crawl_api.py +1 -1
- lfx/components/firecrawl/firecrawl_extract_api.py +1 -1
- lfx/components/firecrawl/firecrawl_map_api.py +1 -1
- lfx/components/firecrawl/firecrawl_scrape_api.py +1 -1
- lfx/components/google/gmail.py +1 -0
- lfx/components/google/google_generative_ai_embeddings.py +1 -1
- lfx/components/helpers/memory.py +8 -6
- lfx/components/helpers/output_parser.py +1 -0
- lfx/components/helpers/store_message.py +1 -0
- lfx/components/huggingface/huggingface.py +3 -1
- lfx/components/huggingface/huggingface_inference_api.py +1 -1
- lfx/components/ibm/watsonx.py +1 -1
- lfx/components/ibm/watsonx_embeddings.py +1 -1
- lfx/components/icosacomputing/combinatorial_reasoner.py +1 -1
- lfx/components/input_output/chat.py +0 -27
- lfx/components/input_output/chat_output.py +3 -27
- lfx/components/knowledge_bases/__init__.py +34 -0
- lfx/components/knowledge_bases/ingestion.py +686 -0
- lfx/components/knowledge_bases/retrieval.py +256 -0
- lfx/components/langchain_utilities/langchain_hub.py +1 -1
- lfx/components/langwatch/langwatch.py +1 -1
- lfx/components/logic/conditional_router.py +40 -3
- lfx/components/logic/data_conditional_router.py +1 -0
- lfx/components/logic/flow_tool.py +2 -1
- lfx/components/logic/pass_message.py +1 -0
- lfx/components/logic/sub_flow.py +2 -1
- lfx/components/milvus/milvus.py +1 -1
- lfx/components/olivya/olivya.py +1 -1
- lfx/components/processing/alter_metadata.py +1 -0
- lfx/components/processing/combine_text.py +1 -0
- lfx/components/processing/create_data.py +1 -0
- lfx/components/processing/data_to_dataframe.py +1 -0
- lfx/components/processing/extract_key.py +1 -0
- lfx/components/processing/filter_data.py +1 -0
- lfx/components/processing/filter_data_values.py +1 -0
- lfx/components/processing/json_cleaner.py +1 -0
- lfx/components/processing/merge_data.py +1 -0
- lfx/components/processing/message_to_data.py +1 -0
- lfx/components/processing/parse_data.py +1 -0
- lfx/components/processing/parse_dataframe.py +1 -0
- lfx/components/processing/parse_json_data.py +1 -0
- lfx/components/processing/python_repl_core.py +2 -2
- lfx/components/processing/regex.py +1 -0
- lfx/components/processing/select_data.py +1 -0
- lfx/components/processing/structured_output.py +7 -3
- lfx/components/processing/update_data.py +1 -0
- lfx/components/prototypes/__init__.py +8 -7
- lfx/components/qdrant/qdrant.py +1 -1
- lfx/components/redis/redis_chat.py +1 -1
- lfx/components/tools/__init__.py +0 -6
- lfx/components/tools/calculator.py +2 -1
- lfx/components/tools/python_code_structured_tool.py +1 -0
- lfx/components/tools/python_repl.py +2 -1
- lfx/components/tools/search_api.py +2 -1
- lfx/components/tools/serp_api.py +2 -1
- lfx/components/tools/tavily_search_tool.py +1 -0
- lfx/components/tools/wikidata_api.py +2 -1
- lfx/components/tools/wikipedia_api.py +2 -1
- lfx/components/tools/yahoo_finance.py +2 -1
- lfx/components/twelvelabs/video_embeddings.py +1 -1
- lfx/components/upstash/upstash.py +1 -1
- lfx/components/vectorstores/astradb_graph.py +8 -1
- lfx/components/vectorstores/local_db.py +1 -0
- lfx/components/vectorstores/weaviate.py +1 -1
- lfx/components/wolframalpha/wolfram_alpha_api.py +1 -1
- lfx/components/zep/zep.py +2 -1
- lfx/custom/attributes.py +1 -0
- lfx/custom/validate.py +1 -1
- lfx/graph/graph/base.py +61 -4
- lfx/inputs/inputs.py +1 -0
- lfx/log/logger.py +31 -11
- lfx/schema/message.py +6 -1
- lfx/schema/schema.py +4 -0
- lfx/services/__init__.py +3 -0
- lfx/services/mcp_composer/__init__.py +6 -0
- lfx/services/mcp_composer/factory.py +16 -0
- lfx/services/mcp_composer/service.py +599 -0
- lfx/services/schema.py +1 -0
- lfx/services/settings/auth.py +18 -15
- lfx/services/settings/base.py +38 -0
- lfx/services/settings/constants.py +4 -1
- lfx/services/settings/feature_flags.py +0 -1
- lfx/template/frontend_node/base.py +2 -0
- lfx/utils/image.py +1 -1
- {lfx_nightly-0.1.12.dev14.dist-info → lfx_nightly-0.1.12.dev16.dist-info}/METADATA +1 -1
- {lfx_nightly-0.1.12.dev14.dist-info → lfx_nightly-0.1.12.dev16.dist-info}/RECORD +129 -121
- lfx/components/datastax/astradb.py +0 -1285
- {lfx_nightly-0.1.12.dev14.dist-info → lfx_nightly-0.1.12.dev16.dist-info}/WHEEL +0 -0
- {lfx_nightly-0.1.12.dev14.dist-info → lfx_nightly-0.1.12.dev16.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
from langflow.base.models.aws_constants import AWS_REGIONS, AWS_MODEL_IDs
|
|
2
|
+
from langflow.base.models.model import LCModelComponent
|
|
3
|
+
from langflow.field_typing import LanguageModel
|
|
4
|
+
from langflow.inputs.inputs import BoolInput, FloatInput, IntInput, MessageTextInput, SecretStrInput
|
|
5
|
+
from langflow.io import DictInput, DropdownInput
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class AmazonBedrockConverseComponent(LCModelComponent):
|
|
9
|
+
display_name: str = "Amazon Bedrock Converse"
|
|
10
|
+
description: str = (
|
|
11
|
+
"Generate text using Amazon Bedrock LLMs with the modern Converse API "
|
|
12
|
+
"for improved conversation handling. We recommend the Converse API for users "
|
|
13
|
+
"who do not need to use custom models. It can be accessed using ChatBedrockConverse."
|
|
14
|
+
)
|
|
15
|
+
icon = "Amazon"
|
|
16
|
+
name = "AmazonBedrockConverseModel"
|
|
17
|
+
beta = True
|
|
18
|
+
|
|
19
|
+
inputs = [
|
|
20
|
+
*LCModelComponent._base_inputs,
|
|
21
|
+
DropdownInput(
|
|
22
|
+
name="model_id",
|
|
23
|
+
display_name="Model ID",
|
|
24
|
+
options=AWS_MODEL_IDs,
|
|
25
|
+
value="anthropic.claude-3-5-sonnet-20241022-v2:0",
|
|
26
|
+
info="List of available model IDs to choose from.",
|
|
27
|
+
),
|
|
28
|
+
SecretStrInput(
|
|
29
|
+
name="aws_access_key_id",
|
|
30
|
+
display_name="AWS Access Key ID",
|
|
31
|
+
info="The access key for your AWS account. "
|
|
32
|
+
"Usually set in Python code as the environment variable 'AWS_ACCESS_KEY_ID'.",
|
|
33
|
+
value="AWS_ACCESS_KEY_ID",
|
|
34
|
+
required=True,
|
|
35
|
+
),
|
|
36
|
+
SecretStrInput(
|
|
37
|
+
name="aws_secret_access_key",
|
|
38
|
+
display_name="AWS Secret Access Key",
|
|
39
|
+
info="The secret key for your AWS account. "
|
|
40
|
+
"Usually set in Python code as the environment variable 'AWS_SECRET_ACCESS_KEY'.",
|
|
41
|
+
value="AWS_SECRET_ACCESS_KEY",
|
|
42
|
+
required=True,
|
|
43
|
+
),
|
|
44
|
+
SecretStrInput(
|
|
45
|
+
name="aws_session_token",
|
|
46
|
+
display_name="AWS Session Token",
|
|
47
|
+
advanced=True,
|
|
48
|
+
info="The session key for your AWS account. "
|
|
49
|
+
"Only needed for temporary credentials. "
|
|
50
|
+
"Usually set in Python code as the environment variable 'AWS_SESSION_TOKEN'.",
|
|
51
|
+
load_from_db=False,
|
|
52
|
+
),
|
|
53
|
+
SecretStrInput(
|
|
54
|
+
name="credentials_profile_name",
|
|
55
|
+
display_name="Credentials Profile Name",
|
|
56
|
+
advanced=True,
|
|
57
|
+
info="The name of the profile to use from your "
|
|
58
|
+
"~/.aws/credentials file. "
|
|
59
|
+
"If not provided, the default profile will be used.",
|
|
60
|
+
load_from_db=False,
|
|
61
|
+
),
|
|
62
|
+
DropdownInput(
|
|
63
|
+
name="region_name",
|
|
64
|
+
display_name="Region Name",
|
|
65
|
+
value="us-east-1",
|
|
66
|
+
options=AWS_REGIONS,
|
|
67
|
+
info="The AWS region where your Bedrock resources are located.",
|
|
68
|
+
),
|
|
69
|
+
MessageTextInput(
|
|
70
|
+
name="endpoint_url",
|
|
71
|
+
display_name="Endpoint URL",
|
|
72
|
+
advanced=True,
|
|
73
|
+
info="The URL of the Bedrock endpoint to use.",
|
|
74
|
+
),
|
|
75
|
+
# Model-specific parameters for fine control
|
|
76
|
+
FloatInput(
|
|
77
|
+
name="temperature",
|
|
78
|
+
display_name="Temperature",
|
|
79
|
+
value=0.7,
|
|
80
|
+
info="Controls randomness in output. Higher values make output more random.",
|
|
81
|
+
advanced=True,
|
|
82
|
+
),
|
|
83
|
+
IntInput(
|
|
84
|
+
name="max_tokens",
|
|
85
|
+
display_name="Max Tokens",
|
|
86
|
+
value=4096,
|
|
87
|
+
info="Maximum number of tokens to generate.",
|
|
88
|
+
advanced=True,
|
|
89
|
+
),
|
|
90
|
+
FloatInput(
|
|
91
|
+
name="top_p",
|
|
92
|
+
display_name="Top P",
|
|
93
|
+
value=0.9,
|
|
94
|
+
info="Nucleus sampling parameter. Controls diversity of output.",
|
|
95
|
+
advanced=True,
|
|
96
|
+
),
|
|
97
|
+
IntInput(
|
|
98
|
+
name="top_k",
|
|
99
|
+
display_name="Top K",
|
|
100
|
+
value=250,
|
|
101
|
+
info="Limits the number of highest probability vocabulary tokens to consider. "
|
|
102
|
+
"Note: Not all models support top_k. Use 'Additional Model Fields' for manual configuration if needed.",
|
|
103
|
+
advanced=True,
|
|
104
|
+
),
|
|
105
|
+
BoolInput(
|
|
106
|
+
name="disable_streaming",
|
|
107
|
+
display_name="Disable Streaming",
|
|
108
|
+
value=False,
|
|
109
|
+
info="If True, disables streaming responses. Useful for batch processing.",
|
|
110
|
+
advanced=True,
|
|
111
|
+
),
|
|
112
|
+
DictInput(
|
|
113
|
+
name="additional_model_fields",
|
|
114
|
+
display_name="Additional Model Fields",
|
|
115
|
+
advanced=True,
|
|
116
|
+
is_list=True,
|
|
117
|
+
info="Additional model-specific parameters for fine-tuning behavior.",
|
|
118
|
+
),
|
|
119
|
+
]
|
|
120
|
+
|
|
121
|
+
def build_model(self) -> LanguageModel: # type: ignore[type-var]
|
|
122
|
+
try:
|
|
123
|
+
from langchain_aws.chat_models.bedrock_converse import ChatBedrockConverse
|
|
124
|
+
except ImportError as e:
|
|
125
|
+
msg = "langchain_aws is not installed. Please install it with `pip install langchain_aws`."
|
|
126
|
+
raise ImportError(msg) from e
|
|
127
|
+
|
|
128
|
+
# Prepare initialization parameters
|
|
129
|
+
init_params = {
|
|
130
|
+
"model": self.model_id,
|
|
131
|
+
"region_name": self.region_name,
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
# Add AWS credentials if provided
|
|
135
|
+
if self.aws_access_key_id:
|
|
136
|
+
init_params["aws_access_key_id"] = self.aws_access_key_id
|
|
137
|
+
if self.aws_secret_access_key:
|
|
138
|
+
init_params["aws_secret_access_key"] = self.aws_secret_access_key
|
|
139
|
+
if self.aws_session_token:
|
|
140
|
+
init_params["aws_session_token"] = self.aws_session_token
|
|
141
|
+
if self.credentials_profile_name:
|
|
142
|
+
init_params["credentials_profile_name"] = self.credentials_profile_name
|
|
143
|
+
if self.endpoint_url:
|
|
144
|
+
init_params["endpoint_url"] = self.endpoint_url
|
|
145
|
+
|
|
146
|
+
# Add model parameters directly as supported by ChatBedrockConverse
|
|
147
|
+
if hasattr(self, "temperature") and self.temperature is not None:
|
|
148
|
+
init_params["temperature"] = self.temperature
|
|
149
|
+
if hasattr(self, "max_tokens") and self.max_tokens is not None:
|
|
150
|
+
init_params["max_tokens"] = self.max_tokens
|
|
151
|
+
if hasattr(self, "top_p") and self.top_p is not None:
|
|
152
|
+
init_params["top_p"] = self.top_p
|
|
153
|
+
|
|
154
|
+
# Handle streaming - only disable if explicitly requested
|
|
155
|
+
if hasattr(self, "disable_streaming") and self.disable_streaming:
|
|
156
|
+
init_params["disable_streaming"] = True
|
|
157
|
+
|
|
158
|
+
# Handle additional model request fields carefully
|
|
159
|
+
# Based on the error, inferenceConfig should not be passed as additional fields for some models
|
|
160
|
+
additional_model_request_fields = {}
|
|
161
|
+
|
|
162
|
+
# Only add top_k if user explicitly provided additional fields or if needed for specific models
|
|
163
|
+
if hasattr(self, "additional_model_fields") and self.additional_model_fields:
|
|
164
|
+
for field in self.additional_model_fields:
|
|
165
|
+
if isinstance(field, dict):
|
|
166
|
+
additional_model_request_fields.update(field)
|
|
167
|
+
|
|
168
|
+
# For now, don't automatically add inferenceConfig for top_k to avoid validation errors
|
|
169
|
+
# Users can manually add it via additional_model_fields if their model supports it
|
|
170
|
+
|
|
171
|
+
# Only add if we have actual additional fields
|
|
172
|
+
if additional_model_request_fields:
|
|
173
|
+
init_params["additional_model_request_fields"] = additional_model_request_fields
|
|
174
|
+
|
|
175
|
+
try:
|
|
176
|
+
output = ChatBedrockConverse(**init_params)
|
|
177
|
+
except Exception as e:
|
|
178
|
+
# Provide helpful error message with fallback suggestions
|
|
179
|
+
error_details = str(e)
|
|
180
|
+
if "validation error" in error_details.lower():
|
|
181
|
+
msg = (
|
|
182
|
+
f"ChatBedrockConverse validation error: {error_details}. "
|
|
183
|
+
f"This may be due to incompatible parameters for model '{self.model_id}'. "
|
|
184
|
+
f"Consider adjusting the model parameters or trying the legacy Amazon Bedrock component."
|
|
185
|
+
)
|
|
186
|
+
elif "converse api" in error_details.lower():
|
|
187
|
+
msg = (
|
|
188
|
+
f"Converse API error: {error_details}. "
|
|
189
|
+
f"The model '{self.model_id}' may not support the Converse API. "
|
|
190
|
+
f"Try using the legacy Amazon Bedrock component instead."
|
|
191
|
+
)
|
|
192
|
+
else:
|
|
193
|
+
msg = f"Could not initialize ChatBedrockConverse: {error_details}"
|
|
194
|
+
raise ValueError(msg) from e
|
|
195
|
+
|
|
196
|
+
return output
|
|
@@ -7,7 +7,11 @@ from lfx.io import DictInput, DropdownInput
|
|
|
7
7
|
|
|
8
8
|
class AmazonBedrockComponent(LCModelComponent):
|
|
9
9
|
display_name: str = "Amazon Bedrock"
|
|
10
|
-
description: str =
|
|
10
|
+
description: str = (
|
|
11
|
+
"Generate text using Amazon Bedrock LLMs with the legacy ChatBedrock API. "
|
|
12
|
+
"For better compatibility, newer features, and improved conversation handling, "
|
|
13
|
+
"we recommend using Amazon Bedrock Converse instead."
|
|
14
|
+
)
|
|
11
15
|
icon = "Amazon"
|
|
12
16
|
name = "AmazonBedrockModel"
|
|
13
17
|
|
|
@@ -39,7 +39,7 @@ class AzureChatOpenAIComponent(LCModelComponent):
|
|
|
39
39
|
required=True,
|
|
40
40
|
),
|
|
41
41
|
MessageTextInput(name="azure_deployment", display_name="Deployment Name", required=True),
|
|
42
|
-
SecretStrInput(name="api_key", display_name="API Key", required=True),
|
|
42
|
+
SecretStrInput(name="api_key", display_name="Azure Chat OpenAI API Key", required=True),
|
|
43
43
|
DropdownInput(
|
|
44
44
|
name="api_version",
|
|
45
45
|
display_name="API Version",
|
lfx/components/chroma/chroma.py
CHANGED
|
@@ -121,7 +121,8 @@ class ChromaVectorStoreComponent(LCVectorStoreComponent):
|
|
|
121
121
|
)
|
|
122
122
|
|
|
123
123
|
self._add_documents_to_vector_store(chroma)
|
|
124
|
-
|
|
124
|
+
limit = int(self.limit) if self.limit is not None and str(self.limit).strip() else None
|
|
125
|
+
self.status = chroma_collection_to_data(chroma.get(limit=limit))
|
|
125
126
|
return chroma
|
|
126
127
|
|
|
127
128
|
def _add_documents_to_vector_store(self, vector_store: "Chroma") -> None:
|
|
@@ -138,7 +139,8 @@ class ChromaVectorStoreComponent(LCVectorStoreComponent):
|
|
|
138
139
|
if self.allow_duplicates:
|
|
139
140
|
stored_data = []
|
|
140
141
|
else:
|
|
141
|
-
|
|
142
|
+
limit = int(self.limit) if self.limit is not None and str(self.limit).strip() else None
|
|
143
|
+
stored_data = chroma_collection_to_data(vector_store.get(limit=limit))
|
|
142
144
|
for value in deepcopy(stored_data):
|
|
143
145
|
del value.id
|
|
144
146
|
stored_documents_without_id.append(value)
|
|
@@ -26,7 +26,7 @@ class ClickhouseVectorStoreComponent(LCVectorStoreComponent):
|
|
|
26
26
|
StrInput(name="database", display_name="database", required=True),
|
|
27
27
|
StrInput(name="table", display_name="Table name", required=True),
|
|
28
28
|
StrInput(name="username", display_name="The ClickHouse user name.", required=True),
|
|
29
|
-
SecretStrInput(name="password", display_name="
|
|
29
|
+
SecretStrInput(name="password", display_name="Clickhouse Password", required=True),
|
|
30
30
|
DropdownInput(
|
|
31
31
|
name="index_type",
|
|
32
32
|
display_name="index_type",
|
|
@@ -29,7 +29,7 @@ class ConfluenceComponent(Component):
|
|
|
29
29
|
),
|
|
30
30
|
SecretStrInput(
|
|
31
31
|
name="api_key",
|
|
32
|
-
display_name="API Key",
|
|
32
|
+
display_name="Confluence API Key",
|
|
33
33
|
required=True,
|
|
34
34
|
info="Atlassian Key. Create at: https://id.atlassian.com/manage-profile/security/api-tokens",
|
|
35
35
|
),
|
lfx/components/crewai/crewai.py
CHANGED
|
@@ -21,6 +21,7 @@ class CrewAIAgentComponent(Component):
|
|
|
21
21
|
documentation: str = "https://docs.crewai.com/how-to/LLM-Connections/"
|
|
22
22
|
icon = "CrewAI"
|
|
23
23
|
legacy = True
|
|
24
|
+
replacement = "agents.Agent"
|
|
24
25
|
|
|
25
26
|
inputs = [
|
|
26
27
|
MultilineInput(name="role", display_name="Role", info="The role of the agent."),
|
|
@@ -8,6 +8,7 @@ class HierarchicalTaskComponent(Component):
|
|
|
8
8
|
description: str = "Each task must have a description, an expected output and an agent responsible for execution."
|
|
9
9
|
icon = "CrewAI"
|
|
10
10
|
legacy = True
|
|
11
|
+
replacement = "agents.Agent"
|
|
11
12
|
inputs = [
|
|
12
13
|
MultilineInput(
|
|
13
14
|
name="task_description",
|
|
@@ -8,6 +8,7 @@ class SequentialTaskComponent(Component):
|
|
|
8
8
|
description: str = "Each task must have a description, an expected output and an agent responsible for execution."
|
|
9
9
|
icon = "CrewAI"
|
|
10
10
|
legacy = True
|
|
11
|
+
replacement = "agents.Agent"
|
|
11
12
|
inputs = [
|
|
12
13
|
MultilineInput(
|
|
13
14
|
name="task_description",
|
|
@@ -188,6 +188,8 @@ class APIRequestComponent(Component):
|
|
|
188
188
|
"""Process the body input into a valid dictionary."""
|
|
189
189
|
if body is None:
|
|
190
190
|
return {}
|
|
191
|
+
if hasattr(body, "data"):
|
|
192
|
+
body = body.data
|
|
191
193
|
if isinstance(body, dict):
|
|
192
194
|
return self._process_dict_body(body)
|
|
193
195
|
if isinstance(body, str):
|
|
@@ -212,10 +214,18 @@ class APIRequestComponent(Component):
|
|
|
212
214
|
processed_dict = {}
|
|
213
215
|
try:
|
|
214
216
|
for item in body:
|
|
215
|
-
|
|
217
|
+
# Unwrap Data objects
|
|
218
|
+
current_item = item
|
|
219
|
+
if hasattr(item, "data"):
|
|
220
|
+
unwrapped_data = item.data
|
|
221
|
+
# If the unwrapped data is a dict but not key-value format, use it directly
|
|
222
|
+
if isinstance(unwrapped_data, dict) and not self._is_valid_key_value_item(unwrapped_data):
|
|
223
|
+
return unwrapped_data
|
|
224
|
+
current_item = unwrapped_data
|
|
225
|
+
if not self._is_valid_key_value_item(current_item):
|
|
216
226
|
continue
|
|
217
|
-
key =
|
|
218
|
-
value = self._parse_json_value(
|
|
227
|
+
key = current_item["key"]
|
|
228
|
+
value = self._parse_json_value(current_item["value"])
|
|
219
229
|
processed_dict[key] = value
|
|
220
230
|
except (KeyError, TypeError, ValueError) as e:
|
|
221
231
|
self.log(f"Failed to process body list: {e}")
|
lfx/components/data/file.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
"""Enhanced file component with
|
|
1
|
+
"""Enhanced file component with Docling support and process isolation.
|
|
2
2
|
|
|
3
3
|
Notes:
|
|
4
4
|
-----
|
|
5
|
-
- Functionality is preserved with minimal behavioral changes.
|
|
6
5
|
- ALL Docling parsing/export runs in a separate OS process to prevent memory
|
|
7
6
|
growth and native library state from impacting the main Langflow process.
|
|
8
7
|
- Standard text/structured parsing continues to use existing BaseFileComponent
|
|
@@ -31,28 +30,22 @@ class FileComponent(BaseFileComponent):
|
|
|
31
30
|
"""File component with optional Docling processing (isolated in a subprocess)."""
|
|
32
31
|
|
|
33
32
|
display_name = "File"
|
|
34
|
-
description = "Loads content from
|
|
33
|
+
description = "Loads content from one or more files."
|
|
35
34
|
documentation: str = "https://docs.langflow.org/components-data#file"
|
|
36
35
|
icon = "file-text"
|
|
37
36
|
name = "File"
|
|
38
37
|
|
|
39
38
|
# Docling-supported/compatible extensions; TEXT_FILE_TYPES are supported by the base loader.
|
|
40
39
|
VALID_EXTENSIONS = [
|
|
40
|
+
*TEXT_FILE_TYPES,
|
|
41
41
|
"adoc",
|
|
42
42
|
"asciidoc",
|
|
43
43
|
"asc",
|
|
44
44
|
"bmp",
|
|
45
|
-
"csv",
|
|
46
45
|
"dotx",
|
|
47
46
|
"dotm",
|
|
48
47
|
"docm",
|
|
49
|
-
"docx",
|
|
50
|
-
"htm",
|
|
51
|
-
"html",
|
|
52
48
|
"jpeg",
|
|
53
|
-
"json",
|
|
54
|
-
"md",
|
|
55
|
-
"pdf",
|
|
56
49
|
"png",
|
|
57
50
|
"potx",
|
|
58
51
|
"ppsx",
|
|
@@ -61,13 +54,10 @@ class FileComponent(BaseFileComponent):
|
|
|
61
54
|
"ppsm",
|
|
62
55
|
"pptx",
|
|
63
56
|
"tiff",
|
|
64
|
-
"txt",
|
|
65
57
|
"xls",
|
|
66
58
|
"xlsx",
|
|
67
59
|
"xhtml",
|
|
68
|
-
"xml",
|
|
69
60
|
"webp",
|
|
70
|
-
*TEXT_FILE_TYPES,
|
|
71
61
|
]
|
|
72
62
|
|
|
73
63
|
# Fixed export settings used when markdown export is requested.
|
|
@@ -91,6 +81,7 @@ class FileComponent(BaseFileComponent):
|
|
|
91
81
|
info=(
|
|
92
82
|
"Enable advanced document processing and export with Docling for PDFs, images, and office documents. "
|
|
93
83
|
"Available only for single file processing."
|
|
84
|
+
"Note that advanced document processing can consume significant resources."
|
|
94
85
|
),
|
|
95
86
|
show=False,
|
|
96
87
|
),
|
|
@@ -101,13 +92,14 @@ class FileComponent(BaseFileComponent):
|
|
|
101
92
|
options=["standard", "vlm"],
|
|
102
93
|
value="standard",
|
|
103
94
|
advanced=True,
|
|
95
|
+
real_time_refresh=True,
|
|
104
96
|
),
|
|
105
97
|
DropdownInput(
|
|
106
98
|
name="ocr_engine",
|
|
107
99
|
display_name="OCR Engine",
|
|
108
100
|
info="OCR engine to use. Only available when pipeline is set to 'standard'.",
|
|
109
|
-
options=["", "easyocr"],
|
|
110
|
-
value="",
|
|
101
|
+
options=["None", "easyocr"],
|
|
102
|
+
value="easyocr",
|
|
111
103
|
show=False,
|
|
112
104
|
advanced=True,
|
|
113
105
|
),
|
|
@@ -190,16 +182,25 @@ class FileComponent(BaseFileComponent):
|
|
|
190
182
|
if f in build_config:
|
|
191
183
|
build_config[f]["show"] = False
|
|
192
184
|
|
|
185
|
+
# Docling Processing
|
|
193
186
|
elif field_name == "advanced_mode":
|
|
194
187
|
for f in ("pipeline", "ocr_engine", "doc_key", "md_image_placeholder", "md_page_break_placeholder"):
|
|
195
188
|
if f in build_config:
|
|
196
189
|
build_config[f]["show"] = bool(field_value)
|
|
197
190
|
|
|
191
|
+
elif field_name == "pipeline":
|
|
192
|
+
if field_value == "standard":
|
|
193
|
+
build_config["ocr_engine"]["show"] = True
|
|
194
|
+
build_config["ocr_engine"]["value"] = "easyocr"
|
|
195
|
+
else:
|
|
196
|
+
build_config["ocr_engine"]["show"] = False
|
|
197
|
+
build_config["ocr_engine"]["value"] = "None"
|
|
198
|
+
|
|
198
199
|
return build_config
|
|
199
200
|
|
|
200
201
|
def update_outputs(self, frontend_node: dict[str, Any], field_name: str, field_value: Any) -> dict[str, Any]: # noqa: ARG002
|
|
201
202
|
"""Dynamically show outputs based on file count/type and advanced mode."""
|
|
202
|
-
if field_name not in ["path", "advanced_mode"]:
|
|
203
|
+
if field_name not in ["path", "advanced_mode", "pipeline"]:
|
|
203
204
|
return frontend_node
|
|
204
205
|
|
|
205
206
|
template = frontend_node.get("template", {})
|
|
@@ -222,10 +223,10 @@ class FileComponent(BaseFileComponent):
|
|
|
222
223
|
advanced_mode = frontend_node.get("template", {}).get("advanced_mode", {}).get("value", False)
|
|
223
224
|
if advanced_mode:
|
|
224
225
|
frontend_node["outputs"].append(
|
|
225
|
-
Output(display_name="Structured Output", name="
|
|
226
|
+
Output(display_name="Structured Output", name="advanced_dataframe", method="load_files_dataframe"),
|
|
226
227
|
)
|
|
227
228
|
frontend_node["outputs"].append(
|
|
228
|
-
Output(display_name="Markdown", name="
|
|
229
|
+
Output(display_name="Markdown", name="advanced_markdown", method="load_files_markdown"),
|
|
229
230
|
)
|
|
230
231
|
frontend_node["outputs"].append(
|
|
231
232
|
Output(display_name="File Path", name="path", method="load_files_path"),
|
|
@@ -296,10 +297,15 @@ class FileComponent(BaseFileComponent):
|
|
|
296
297
|
"md_image_placeholder": str(self.md_image_placeholder),
|
|
297
298
|
"md_page_break_placeholder": str(self.md_page_break_placeholder),
|
|
298
299
|
"pipeline": str(self.pipeline),
|
|
299
|
-
"ocr_engine":
|
|
300
|
+
"ocr_engine": (
|
|
301
|
+
self.ocr_engine if self.ocr_engine and self.ocr_engine != "None" and self.pipeline != "vlm" else None
|
|
302
|
+
),
|
|
300
303
|
}
|
|
301
304
|
|
|
302
|
-
|
|
305
|
+
self.log(f"Starting Docling subprocess for file: {file_path}")
|
|
306
|
+
self.log(args)
|
|
307
|
+
|
|
308
|
+
# Child script for isolating the docling processing
|
|
303
309
|
child_script = textwrap.dedent(
|
|
304
310
|
r"""
|
|
305
311
|
import json, sys
|
|
@@ -346,11 +352,15 @@ class FileComponent(BaseFileComponent):
|
|
|
346
352
|
raise ImportError(f"Docling imports failed: {e}") from e
|
|
347
353
|
|
|
348
354
|
def create_converter(strategy, input_format, DocumentConverter, pipeline, ocr_engine):
|
|
349
|
-
|
|
355
|
+
# --- Standard PDF/IMAGE pipeline (your existing behavior), with optional OCR ---
|
|
356
|
+
if pipeline == "standard":
|
|
350
357
|
try:
|
|
351
358
|
from docling.datamodel.pipeline_options import PdfPipelineOptions # type: ignore
|
|
352
359
|
from docling.document_converter import PdfFormatOption # type: ignore
|
|
360
|
+
|
|
353
361
|
pipe = PdfPipelineOptions()
|
|
362
|
+
pipe.do_ocr = False
|
|
363
|
+
|
|
354
364
|
if ocr_engine:
|
|
355
365
|
try:
|
|
356
366
|
from docling.models.factories import get_ocr_factory # type: ignore
|
|
@@ -358,15 +368,39 @@ class FileComponent(BaseFileComponent):
|
|
|
358
368
|
fac = get_ocr_factory(allow_external_plugins=False)
|
|
359
369
|
pipe.ocr_options = fac.create_options(kind=ocr_engine)
|
|
360
370
|
except Exception:
|
|
371
|
+
# If OCR setup fails, disable it
|
|
361
372
|
pipe.do_ocr = False
|
|
373
|
+
|
|
362
374
|
fmt = {}
|
|
363
375
|
if hasattr(input_format, "PDF"):
|
|
364
376
|
fmt[getattr(input_format, "PDF")] = PdfFormatOption(pipeline_options=pipe)
|
|
365
377
|
if hasattr(input_format, "IMAGE"):
|
|
366
378
|
fmt[getattr(input_format, "IMAGE")] = PdfFormatOption(pipeline_options=pipe)
|
|
379
|
+
|
|
367
380
|
return DocumentConverter(format_options=fmt)
|
|
368
381
|
except Exception:
|
|
369
382
|
return DocumentConverter()
|
|
383
|
+
|
|
384
|
+
# --- Vision-Language Model (VLM) pipeline ---
|
|
385
|
+
if pipeline == "vlm":
|
|
386
|
+
try:
|
|
387
|
+
from docling.pipeline.vlm_pipeline import VlmPipeline
|
|
388
|
+
from docling.document_converter import PdfFormatOption # type: ignore
|
|
389
|
+
|
|
390
|
+
vl_pipe = VlmPipelineOptions()
|
|
391
|
+
|
|
392
|
+
# VLM paths generally don't need OCR; keep OCR off by default here.
|
|
393
|
+
fmt = {}
|
|
394
|
+
if hasattr(input_format, "PDF"):
|
|
395
|
+
fmt[getattr(input_format, "PDF")] = PdfFormatOption(pipeline_cls=VlmPipeline)
|
|
396
|
+
if hasattr(input_format, "IMAGE"):
|
|
397
|
+
fmt[getattr(input_format, "IMAGE")] = PdfFormatOption(pipeline_cls=VlmPipeline)
|
|
398
|
+
|
|
399
|
+
return DocumentConverter(format_options=fmt)
|
|
400
|
+
except Exception:
|
|
401
|
+
return DocumentConverter()
|
|
402
|
+
|
|
403
|
+
# --- Fallback: default converter with no special options ---
|
|
370
404
|
return DocumentConverter()
|
|
371
405
|
|
|
372
406
|
def export_markdown(document, ImageRefMode, image_mode, img_ph, pg_ph):
|
|
@@ -565,13 +599,25 @@ class FileComponent(BaseFileComponent):
|
|
|
565
599
|
|
|
566
600
|
# ------------------------------ Output helpers -----------------------------------
|
|
567
601
|
|
|
568
|
-
def
|
|
569
|
-
|
|
602
|
+
def load_files_helper(self) -> DataFrame:
|
|
603
|
+
result = self.load_files()
|
|
604
|
+
|
|
605
|
+
# Error condition - raise error if no text and an error is present
|
|
606
|
+
if not hasattr(result, "text"):
|
|
607
|
+
if hasattr(result, "error"):
|
|
608
|
+
raise ValueError(result.error[0])
|
|
609
|
+
msg = "No content generated."
|
|
610
|
+
raise ValueError(msg)
|
|
611
|
+
|
|
612
|
+
return result
|
|
613
|
+
|
|
614
|
+
def load_files_dataframe(self) -> DataFrame:
|
|
615
|
+
"""Load files using advanced Docling processing and export to DataFrame format."""
|
|
570
616
|
self.markdown = False
|
|
571
|
-
return self.
|
|
617
|
+
return self.load_files_helper()
|
|
572
618
|
|
|
573
619
|
def load_files_markdown(self) -> Message:
|
|
574
620
|
"""Load files using advanced Docling processing and export to Markdown format."""
|
|
575
621
|
self.markdown = True
|
|
576
|
-
result = self.
|
|
622
|
+
result = self.load_files_helper()
|
|
577
623
|
return Message(text=str(result.text[0]))
|
|
@@ -22,7 +22,7 @@ class AstraDBChatMemory(LCChatMemoryComponent):
|
|
|
22
22
|
),
|
|
23
23
|
SecretStrInput(
|
|
24
24
|
name="api_endpoint",
|
|
25
|
-
display_name="API Endpoint",
|
|
25
|
+
display_name="Astra DB API Endpoint",
|
|
26
26
|
info="API endpoint URL for the Astra DB service.",
|
|
27
27
|
value="ASTRA_DB_API_ENDPOINT",
|
|
28
28
|
required=True,
|
|
@@ -49,6 +49,7 @@ class AstraDBChatMemory(LCChatMemoryComponent):
|
|
|
49
49
|
|
|
50
50
|
def build_message_history(self) -> Memory:
|
|
51
51
|
try:
|
|
52
|
+
from astrapy.admin import parse_api_endpoint
|
|
52
53
|
from langchain_astradb.chat_message_histories import AstraDBChatMessageHistory
|
|
53
54
|
|
|
54
55
|
except ImportError as e:
|
|
@@ -6,15 +6,13 @@ from lfx.template.field.base import Output
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class AstraVectorizeComponent(Component):
|
|
9
|
-
display_name: str = "Astra Vectorize
|
|
10
|
-
description: str =
|
|
11
|
-
"Configuration options for Astra Vectorize server-side embeddings. "
|
|
12
|
-
"This component is deprecated. Please use the Astra DB Component directly."
|
|
13
|
-
)
|
|
9
|
+
display_name: str = "Astra Vectorize"
|
|
10
|
+
description: str = "Configuration options for Astra Vectorize server-side embeddings. "
|
|
14
11
|
documentation: str = "https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html"
|
|
15
12
|
legacy = True
|
|
16
13
|
icon = "AstraDB"
|
|
17
14
|
name = "AstraVectorize"
|
|
15
|
+
replacement = ["datastax.AstraDB"]
|
|
18
16
|
|
|
19
17
|
VECTORIZE_PROVIDERS_MAPPING = {
|
|
20
18
|
"Azure OpenAI": ["azureOpenAI", ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"]],
|
|
@@ -3,7 +3,6 @@ from datetime import datetime, timezone
|
|
|
3
3
|
from typing import Any
|
|
4
4
|
|
|
5
5
|
from astrapy import Collection, DataAPIClient, Database
|
|
6
|
-
from astrapy.admin import parse_api_endpoint
|
|
7
6
|
from langchain_core.tools import StructuredTool, Tool
|
|
8
7
|
from pydantic import BaseModel, Field, create_model
|
|
9
8
|
|
|
@@ -192,6 +191,11 @@ class AstraDBToolComponent(LCToolComponent):
|
|
|
192
191
|
_cached_collection: Collection | None = None
|
|
193
192
|
|
|
194
193
|
def _build_collection(self):
|
|
194
|
+
try:
|
|
195
|
+
from astrapy.admin import parse_api_endpoint
|
|
196
|
+
except ImportError as e:
|
|
197
|
+
msg = "Could not import Astra DB integration package. Please install it with `uv pip install astrapy`."
|
|
198
|
+
raise ImportError(msg) from e
|
|
195
199
|
if self._cached_collection:
|
|
196
200
|
return self._cached_collection
|
|
197
201
|
|