lfx-nightly 0.2.0.dev41__py3-none-any.whl → 0.3.0.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lfx/__main__.py +137 -6
- lfx/_assets/component_index.json +1 -1
- lfx/base/agents/agent.py +10 -6
- lfx/base/agents/altk_base_agent.py +5 -3
- lfx/base/agents/altk_tool_wrappers.py +1 -1
- lfx/base/agents/events.py +1 -1
- lfx/base/agents/utils.py +4 -0
- lfx/base/composio/composio_base.py +78 -41
- lfx/base/data/cloud_storage_utils.py +156 -0
- lfx/base/data/docling_utils.py +130 -55
- lfx/base/datastax/astradb_base.py +75 -64
- lfx/base/embeddings/embeddings_class.py +113 -0
- lfx/base/models/__init__.py +11 -1
- lfx/base/models/google_generative_ai_constants.py +33 -9
- lfx/base/models/model_metadata.py +6 -0
- lfx/base/models/ollama_constants.py +196 -30
- lfx/base/models/openai_constants.py +37 -10
- lfx/base/models/unified_models.py +1123 -0
- lfx/base/models/watsonx_constants.py +43 -4
- lfx/base/prompts/api_utils.py +40 -5
- lfx/base/tools/component_tool.py +2 -9
- lfx/cli/__init__.py +10 -2
- lfx/cli/commands.py +3 -0
- lfx/cli/run.py +65 -409
- lfx/cli/script_loader.py +18 -7
- lfx/cli/validation.py +6 -3
- lfx/components/__init__.py +0 -3
- lfx/components/composio/github_composio.py +1 -1
- lfx/components/cuga/cuga_agent.py +39 -27
- lfx/components/data_source/api_request.py +4 -2
- lfx/components/datastax/astradb_assistant_manager.py +4 -2
- lfx/components/docling/__init__.py +45 -11
- lfx/components/docling/docling_inline.py +39 -49
- lfx/components/docling/docling_remote.py +1 -0
- lfx/components/elastic/opensearch_multimodal.py +1733 -0
- lfx/components/files_and_knowledge/file.py +384 -36
- lfx/components/files_and_knowledge/ingestion.py +8 -0
- lfx/components/files_and_knowledge/retrieval.py +10 -0
- lfx/components/files_and_knowledge/save_file.py +91 -88
- lfx/components/langchain_utilities/ibm_granite_handler.py +211 -0
- lfx/components/langchain_utilities/tool_calling.py +37 -6
- lfx/components/llm_operations/batch_run.py +64 -18
- lfx/components/llm_operations/lambda_filter.py +213 -101
- lfx/components/llm_operations/llm_conditional_router.py +39 -7
- lfx/components/llm_operations/structured_output.py +38 -12
- lfx/components/models/__init__.py +16 -74
- lfx/components/models_and_agents/agent.py +51 -203
- lfx/components/models_and_agents/embedding_model.py +171 -255
- lfx/components/models_and_agents/language_model.py +54 -318
- lfx/components/models_and_agents/mcp_component.py +96 -10
- lfx/components/models_and_agents/prompt.py +105 -18
- lfx/components/ollama/ollama_embeddings.py +111 -29
- lfx/components/openai/openai_chat_model.py +1 -1
- lfx/components/processing/text_operations.py +580 -0
- lfx/components/vllm/__init__.py +37 -0
- lfx/components/vllm/vllm.py +141 -0
- lfx/components/vllm/vllm_embeddings.py +110 -0
- lfx/custom/custom_component/component.py +65 -10
- lfx/custom/custom_component/custom_component.py +8 -6
- lfx/events/observability/__init__.py +0 -0
- lfx/events/observability/lifecycle_events.py +111 -0
- lfx/field_typing/__init__.py +57 -58
- lfx/graph/graph/base.py +40 -1
- lfx/graph/utils.py +109 -30
- lfx/graph/vertex/base.py +75 -23
- lfx/graph/vertex/vertex_types.py +0 -5
- lfx/inputs/__init__.py +2 -0
- lfx/inputs/input_mixin.py +55 -0
- lfx/inputs/inputs.py +120 -0
- lfx/interface/components.py +24 -7
- lfx/interface/initialize/loading.py +42 -12
- lfx/io/__init__.py +2 -0
- lfx/run/__init__.py +5 -0
- lfx/run/base.py +464 -0
- lfx/schema/__init__.py +50 -0
- lfx/schema/data.py +1 -1
- lfx/schema/image.py +26 -7
- lfx/schema/message.py +104 -11
- lfx/schema/workflow.py +171 -0
- lfx/services/deps.py +12 -0
- lfx/services/interfaces.py +43 -1
- lfx/services/mcp_composer/service.py +7 -1
- lfx/services/schema.py +1 -0
- lfx/services/settings/auth.py +95 -4
- lfx/services/settings/base.py +11 -1
- lfx/services/settings/constants.py +2 -0
- lfx/services/settings/utils.py +82 -0
- lfx/services/storage/local.py +13 -8
- lfx/services/transaction/__init__.py +5 -0
- lfx/services/transaction/service.py +35 -0
- lfx/tests/unit/components/__init__.py +0 -0
- lfx/utils/constants.py +2 -0
- lfx/utils/mustache_security.py +79 -0
- lfx/utils/validate_cloud.py +81 -3
- {lfx_nightly-0.2.0.dev41.dist-info → lfx_nightly-0.3.0.dev3.dist-info}/METADATA +7 -2
- {lfx_nightly-0.2.0.dev41.dist-info → lfx_nightly-0.3.0.dev3.dist-info}/RECORD +98 -80
- {lfx_nightly-0.2.0.dev41.dist-info → lfx_nightly-0.3.0.dev3.dist-info}/WHEEL +0 -0
- {lfx_nightly-0.2.0.dev41.dist-info → lfx_nightly-0.3.0.dev3.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from collections.abc import AsyncIterator, Iterator
|
|
3
3
|
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
4
5
|
|
|
5
6
|
import orjson
|
|
6
7
|
import pandas as pd
|
|
@@ -13,6 +14,15 @@ from lfx.io import BoolInput, DropdownInput, HandleInput, SecretStrInput, StrInp
|
|
|
13
14
|
from lfx.schema import Data, DataFrame, Message
|
|
14
15
|
from lfx.services.deps import get_settings_service, get_storage_service, session_scope
|
|
15
16
|
from lfx.template.field.base import Output
|
|
17
|
+
from lfx.utils.validate_cloud import is_astra_cloud_environment
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _get_storage_location_options():
|
|
21
|
+
"""Get storage location options, filtering out Local if in Astra cloud environment."""
|
|
22
|
+
all_options = [{"name": "AWS", "icon": "Amazon"}, {"name": "Google Drive", "icon": "google"}]
|
|
23
|
+
if is_astra_cloud_environment():
|
|
24
|
+
return all_options
|
|
25
|
+
return [{"name": "Local", "icon": "hard-drive"}, *all_options]
|
|
16
26
|
|
|
17
27
|
|
|
18
28
|
class SaveToFileComponent(Component):
|
|
@@ -49,11 +59,7 @@ class SaveToFileComponent(Component):
|
|
|
49
59
|
display_name="Storage Location",
|
|
50
60
|
placeholder="Select Location",
|
|
51
61
|
info="Choose where to save the file.",
|
|
52
|
-
options=
|
|
53
|
-
{"name": "Local", "icon": "hard-drive"},
|
|
54
|
-
{"name": "AWS", "icon": "Amazon"},
|
|
55
|
-
{"name": "Google Drive", "icon": "google"},
|
|
56
|
-
],
|
|
62
|
+
options=_get_storage_location_options(),
|
|
57
63
|
real_time_refresh=True,
|
|
58
64
|
limit=1,
|
|
59
65
|
),
|
|
@@ -116,6 +122,7 @@ class SaveToFileComponent(Component):
|
|
|
116
122
|
info="AWS Access key ID.",
|
|
117
123
|
show=False,
|
|
118
124
|
advanced=True,
|
|
125
|
+
required=True,
|
|
119
126
|
),
|
|
120
127
|
SecretStrInput(
|
|
121
128
|
name="aws_secret_access_key",
|
|
@@ -123,6 +130,7 @@ class SaveToFileComponent(Component):
|
|
|
123
130
|
info="AWS Secret Key.",
|
|
124
131
|
show=False,
|
|
125
132
|
advanced=True,
|
|
133
|
+
required=True,
|
|
126
134
|
),
|
|
127
135
|
StrInput(
|
|
128
136
|
name="bucket_name",
|
|
@@ -130,6 +138,7 @@ class SaveToFileComponent(Component):
|
|
|
130
138
|
info="Enter the name of the S3 bucket.",
|
|
131
139
|
show=False,
|
|
132
140
|
advanced=True,
|
|
141
|
+
required=True,
|
|
133
142
|
),
|
|
134
143
|
StrInput(
|
|
135
144
|
name="aws_region",
|
|
@@ -152,6 +161,7 @@ class SaveToFileComponent(Component):
|
|
|
152
161
|
info="Your Google Cloud Platform service account JSON key as a secret string (complete JSON content).",
|
|
153
162
|
show=False,
|
|
154
163
|
advanced=True,
|
|
164
|
+
required=True,
|
|
155
165
|
),
|
|
156
166
|
StrInput(
|
|
157
167
|
name="folder_id",
|
|
@@ -170,6 +180,12 @@ class SaveToFileComponent(Component):
|
|
|
170
180
|
|
|
171
181
|
def update_build_config(self, build_config, field_value, field_name=None):
|
|
172
182
|
"""Update build configuration to show/hide fields based on storage location selection."""
|
|
183
|
+
# Update options dynamically based on cloud environment
|
|
184
|
+
# This ensures options are refreshed when build_config is updated
|
|
185
|
+
if "storage_location" in build_config:
|
|
186
|
+
updated_options = _get_storage_location_options()
|
|
187
|
+
build_config["storage_location"]["options"] = updated_options
|
|
188
|
+
|
|
173
189
|
if field_name != "storage_location":
|
|
174
190
|
return build_config
|
|
175
191
|
|
|
@@ -224,12 +240,14 @@ class SaveToFileComponent(Component):
|
|
|
224
240
|
for f_name in aws_fields:
|
|
225
241
|
if f_name in build_config:
|
|
226
242
|
build_config[f_name]["show"] = True
|
|
243
|
+
build_config[f_name]["advanced"] = False
|
|
227
244
|
|
|
228
245
|
elif location == "Google Drive":
|
|
229
246
|
gdrive_fields = ["gdrive_format", "service_account_key", "folder_id"]
|
|
230
247
|
for f_name in gdrive_fields:
|
|
231
248
|
if f_name in build_config:
|
|
232
249
|
build_config[f_name]["show"] = True
|
|
250
|
+
build_config[f_name]["advanced"] = False
|
|
233
251
|
|
|
234
252
|
return build_config
|
|
235
253
|
|
|
@@ -249,6 +267,11 @@ class SaveToFileComponent(Component):
|
|
|
249
267
|
msg = "Storage location must be selected."
|
|
250
268
|
raise ValueError(msg)
|
|
251
269
|
|
|
270
|
+
# Check if Local storage is disabled in cloud environment
|
|
271
|
+
if storage_location == "Local" and is_astra_cloud_environment():
|
|
272
|
+
msg = "Local storage is not available in cloud environment. Please use AWS or Google Drive."
|
|
273
|
+
raise ValueError(msg)
|
|
274
|
+
|
|
252
275
|
# Route to appropriate save method based on storage location
|
|
253
276
|
if storage_location == "Local":
|
|
254
277
|
return await self._save_to_local()
|
|
@@ -540,32 +563,67 @@ class SaveToFileComponent(Component):
|
|
|
540
563
|
|
|
541
564
|
async def _save_to_aws(self) -> Message:
|
|
542
565
|
"""Save file to AWS S3 using S3 functionality."""
|
|
566
|
+
import os
|
|
567
|
+
|
|
568
|
+
import boto3
|
|
569
|
+
|
|
570
|
+
from lfx.base.data.cloud_storage_utils import create_s3_client, validate_aws_credentials
|
|
571
|
+
|
|
572
|
+
# Get AWS credentials from component inputs or fall back to environment variables
|
|
573
|
+
aws_access_key_id = getattr(self, "aws_access_key_id", None)
|
|
574
|
+
if aws_access_key_id and hasattr(aws_access_key_id, "get_secret_value"):
|
|
575
|
+
aws_access_key_id = aws_access_key_id.get_secret_value()
|
|
576
|
+
if not aws_access_key_id:
|
|
577
|
+
aws_access_key_id = os.getenv("AWS_ACCESS_KEY_ID")
|
|
578
|
+
|
|
579
|
+
aws_secret_access_key = getattr(self, "aws_secret_access_key", None)
|
|
580
|
+
if aws_secret_access_key and hasattr(aws_secret_access_key, "get_secret_value"):
|
|
581
|
+
aws_secret_access_key = aws_secret_access_key.get_secret_value()
|
|
582
|
+
if not aws_secret_access_key:
|
|
583
|
+
aws_secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY")
|
|
584
|
+
|
|
585
|
+
bucket_name = getattr(self, "bucket_name", None)
|
|
586
|
+
if not bucket_name:
|
|
587
|
+
# Try to get from storage service settings
|
|
588
|
+
settings = get_settings_service().settings
|
|
589
|
+
bucket_name = settings.object_storage_bucket_name
|
|
590
|
+
|
|
543
591
|
# Validate AWS credentials
|
|
544
|
-
if not
|
|
545
|
-
msg =
|
|
592
|
+
if not aws_access_key_id:
|
|
593
|
+
msg = (
|
|
594
|
+
"AWS Access Key ID is required for S3 storage. Provide it as a component input "
|
|
595
|
+
"or set AWS_ACCESS_KEY_ID environment variable."
|
|
596
|
+
)
|
|
546
597
|
raise ValueError(msg)
|
|
547
|
-
if not
|
|
548
|
-
msg =
|
|
598
|
+
if not aws_secret_access_key:
|
|
599
|
+
msg = (
|
|
600
|
+
"AWS Secret Key is required for S3 storage. Provide it as a component input "
|
|
601
|
+
"or set AWS_SECRET_ACCESS_KEY environment variable."
|
|
602
|
+
)
|
|
549
603
|
raise ValueError(msg)
|
|
550
|
-
if not
|
|
551
|
-
msg =
|
|
604
|
+
if not bucket_name:
|
|
605
|
+
msg = (
|
|
606
|
+
"S3 Bucket Name is required for S3 storage. Provide it as a component input "
|
|
607
|
+
"or set LANGFLOW_OBJECT_STORAGE_BUCKET_NAME environment variable."
|
|
608
|
+
)
|
|
552
609
|
raise ValueError(msg)
|
|
553
610
|
|
|
554
|
-
#
|
|
555
|
-
|
|
556
|
-
import boto3
|
|
557
|
-
except ImportError as e:
|
|
558
|
-
msg = "boto3 is not installed. Please install it using `uv pip install boto3`."
|
|
559
|
-
raise ImportError(msg) from e
|
|
611
|
+
# Validate AWS credentials
|
|
612
|
+
validate_aws_credentials(self)
|
|
560
613
|
|
|
561
614
|
# Create S3 client
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
"
|
|
615
|
+
s3_client = create_s3_client(self)
|
|
616
|
+
client_config: dict[str, Any] = {
|
|
617
|
+
"aws_access_key_id": str(aws_access_key_id),
|
|
618
|
+
"aws_secret_access_key": str(aws_secret_access_key),
|
|
565
619
|
}
|
|
566
620
|
|
|
567
|
-
|
|
568
|
-
|
|
621
|
+
# Get region from component input, environment variable, or settings
|
|
622
|
+
aws_region = getattr(self, "aws_region", None)
|
|
623
|
+
if not aws_region:
|
|
624
|
+
aws_region = os.getenv("AWS_DEFAULT_REGION") or os.getenv("AWS_REGION")
|
|
625
|
+
if aws_region:
|
|
626
|
+
client_config["region_name"] = str(aws_region)
|
|
569
627
|
|
|
570
628
|
s3_client = boto3.client("s3", **client_config)
|
|
571
629
|
|
|
@@ -589,8 +647,8 @@ class SaveToFileComponent(Component):
|
|
|
589
647
|
|
|
590
648
|
try:
|
|
591
649
|
# Upload to S3
|
|
592
|
-
s3_client.upload_file(temp_file_path,
|
|
593
|
-
s3_url = f"s3://{
|
|
650
|
+
s3_client.upload_file(temp_file_path, bucket_name, file_path)
|
|
651
|
+
s3_url = f"s3://{bucket_name}/{file_path}"
|
|
594
652
|
return Message(text=f"File successfully uploaded to {s3_url}")
|
|
595
653
|
finally:
|
|
596
654
|
# Clean up temp file
|
|
@@ -599,6 +657,12 @@ class SaveToFileComponent(Component):
|
|
|
599
657
|
|
|
600
658
|
async def _save_to_google_drive(self) -> Message:
|
|
601
659
|
"""Save file to Google Drive using Google Drive functionality."""
|
|
660
|
+
import tempfile
|
|
661
|
+
|
|
662
|
+
from googleapiclient.http import MediaFileUpload
|
|
663
|
+
|
|
664
|
+
from lfx.base.data.cloud_storage_utils import create_google_drive_service
|
|
665
|
+
|
|
602
666
|
# Validate Google Drive credentials
|
|
603
667
|
if not getattr(self, "service_account_key", None):
|
|
604
668
|
msg = "GCP Credentials Secret Key is required for Google Drive storage"
|
|
@@ -607,71 +671,10 @@ class SaveToFileComponent(Component):
|
|
|
607
671
|
msg = "Google Drive Folder ID is required for Google Drive storage"
|
|
608
672
|
raise ValueError(msg)
|
|
609
673
|
|
|
610
|
-
#
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
import tempfile
|
|
614
|
-
|
|
615
|
-
from google.oauth2 import service_account
|
|
616
|
-
from googleapiclient.discovery import build
|
|
617
|
-
from googleapiclient.http import MediaFileUpload
|
|
618
|
-
except ImportError as e:
|
|
619
|
-
msg = "Google API client libraries are not installed. Please install them."
|
|
620
|
-
raise ImportError(msg) from e
|
|
621
|
-
|
|
622
|
-
# Parse credentials with multiple fallback strategies
|
|
623
|
-
credentials_dict = None
|
|
624
|
-
parse_errors = []
|
|
625
|
-
|
|
626
|
-
# Strategy 1: Parse as-is with strict=False to allow control characters
|
|
627
|
-
try:
|
|
628
|
-
credentials_dict = json.loads(self.service_account_key, strict=False)
|
|
629
|
-
except json.JSONDecodeError as e:
|
|
630
|
-
parse_errors.append(f"Standard parse: {e!s}")
|
|
631
|
-
|
|
632
|
-
# Strategy 2: Strip whitespace and try again
|
|
633
|
-
if credentials_dict is None:
|
|
634
|
-
try:
|
|
635
|
-
cleaned_key = self.service_account_key.strip()
|
|
636
|
-
credentials_dict = json.loads(cleaned_key, strict=False)
|
|
637
|
-
except json.JSONDecodeError as e:
|
|
638
|
-
parse_errors.append(f"Stripped parse: {e!s}")
|
|
639
|
-
|
|
640
|
-
# Strategy 3: Check if it's double-encoded (JSON string of a JSON string)
|
|
641
|
-
if credentials_dict is None:
|
|
642
|
-
try:
|
|
643
|
-
decoded_once = json.loads(self.service_account_key, strict=False)
|
|
644
|
-
if isinstance(decoded_once, str):
|
|
645
|
-
credentials_dict = json.loads(decoded_once, strict=False)
|
|
646
|
-
else:
|
|
647
|
-
credentials_dict = decoded_once
|
|
648
|
-
except json.JSONDecodeError as e:
|
|
649
|
-
parse_errors.append(f"Double-encoded parse: {e!s}")
|
|
650
|
-
|
|
651
|
-
# Strategy 4: Try to fix common issues with newlines in the private_key field
|
|
652
|
-
if credentials_dict is None:
|
|
653
|
-
try:
|
|
654
|
-
# Replace literal \n with actual newlines which is common in pasted JSON
|
|
655
|
-
fixed_key = self.service_account_key.replace("\\n", "\n")
|
|
656
|
-
credentials_dict = json.loads(fixed_key, strict=False)
|
|
657
|
-
except json.JSONDecodeError as e:
|
|
658
|
-
parse_errors.append(f"Newline-fixed parse: {e!s}")
|
|
659
|
-
|
|
660
|
-
if credentials_dict is None:
|
|
661
|
-
error_details = "; ".join(parse_errors)
|
|
662
|
-
msg = (
|
|
663
|
-
f"Unable to parse service account key JSON. Tried multiple strategies: {error_details}. "
|
|
664
|
-
"Please ensure you've copied the entire JSON content from your service account key file. "
|
|
665
|
-
"The JSON should start with '{' and contain fields like 'type', 'project_id', 'private_key', etc."
|
|
666
|
-
)
|
|
667
|
-
raise ValueError(msg)
|
|
668
|
-
|
|
669
|
-
# Create Google Drive service with appropriate scopes
|
|
670
|
-
# Use drive scope for folder access, file scope is too restrictive for folder verification
|
|
671
|
-
credentials = service_account.Credentials.from_service_account_info(
|
|
672
|
-
credentials_dict, scopes=["https://www.googleapis.com/auth/drive"]
|
|
674
|
+
# Create Google Drive service with full drive scope (needed for folder operations)
|
|
675
|
+
drive_service, credentials = create_google_drive_service(
|
|
676
|
+
self.service_account_key, scopes=["https://www.googleapis.com/auth/drive"], return_credentials=True
|
|
673
677
|
)
|
|
674
|
-
drive_service = build("drive", "v3", credentials=credentials)
|
|
675
678
|
|
|
676
679
|
# Extract content and format
|
|
677
680
|
content = self._extract_content_for_upload()
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
"""IBM WatsonX-specific tool calling logic.
|
|
2
|
+
|
|
3
|
+
This module contains all the specialized handling for IBM WatsonX models
|
|
4
|
+
which have different tool calling behavior compared to other LLMs.
|
|
5
|
+
|
|
6
|
+
The tool calling issues affect ALL models on the WatsonX platform,
|
|
7
|
+
not just Granite models. This includes:
|
|
8
|
+
- meta-llama models
|
|
9
|
+
- mistral models
|
|
10
|
+
- granite models
|
|
11
|
+
- any other model running through WatsonX
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import re
|
|
15
|
+
|
|
16
|
+
from langchain.agents.format_scratchpad.tools import format_to_tool_messages
|
|
17
|
+
from langchain.agents.output_parsers.tools import ToolsAgentOutputParser
|
|
18
|
+
from langchain_core.prompts import ChatPromptTemplate
|
|
19
|
+
from langchain_core.runnables import RunnableLambda
|
|
20
|
+
|
|
21
|
+
from lfx.log.logger import logger
|
|
22
|
+
|
|
23
|
+
# Pattern to detect placeholder usage in tool arguments
|
|
24
|
+
PLACEHOLDER_PATTERN = re.compile(
|
|
25
|
+
r"<[^>]*(?:result|value|output|response|data|from|extract|previous|current|date|input|query|search|tool)[^>]*>",
|
|
26
|
+
re.IGNORECASE,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def is_watsonx_model(llm) -> bool:
|
|
31
|
+
"""Check if the LLM is an IBM WatsonX model (any model, not just Granite).
|
|
32
|
+
|
|
33
|
+
This detects the provider (WatsonX) rather than a specific model,
|
|
34
|
+
since tool calling issues affect all models on the WatsonX platform.
|
|
35
|
+
"""
|
|
36
|
+
# Check class name for WatsonX (e.g., ChatWatsonx)
|
|
37
|
+
class_name = type(llm).__name__.lower()
|
|
38
|
+
if "watsonx" in class_name:
|
|
39
|
+
return True
|
|
40
|
+
|
|
41
|
+
# Fallback: check module name (e.g., langchain_ibm)
|
|
42
|
+
module_name = getattr(type(llm), "__module__", "").lower()
|
|
43
|
+
return "watsonx" in module_name or "langchain_ibm" in module_name
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def is_granite_model(llm) -> bool:
|
|
47
|
+
"""Check if the LLM is an IBM Granite model.
|
|
48
|
+
|
|
49
|
+
DEPRECATED: Use is_watsonx_model() instead.
|
|
50
|
+
Kept for backwards compatibility.
|
|
51
|
+
"""
|
|
52
|
+
model_id = getattr(llm, "model_id", getattr(llm, "model_name", ""))
|
|
53
|
+
return "granite" in str(model_id).lower()
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _get_tool_schema_description(tool) -> str:
|
|
57
|
+
"""Extract a brief description of the tool's expected parameters.
|
|
58
|
+
|
|
59
|
+
Returns empty string if schema extraction fails (graceful degradation).
|
|
60
|
+
"""
|
|
61
|
+
if not hasattr(tool, "args_schema") or not tool.args_schema:
|
|
62
|
+
return ""
|
|
63
|
+
|
|
64
|
+
schema = tool.args_schema
|
|
65
|
+
if not hasattr(schema, "model_fields"):
|
|
66
|
+
return ""
|
|
67
|
+
|
|
68
|
+
try:
|
|
69
|
+
fields = schema.model_fields
|
|
70
|
+
params = []
|
|
71
|
+
for name, field in fields.items():
|
|
72
|
+
required = field.is_required() if hasattr(field, "is_required") else True
|
|
73
|
+
req_str = "(required)" if required else "(optional)"
|
|
74
|
+
params.append(f"{name} {req_str}")
|
|
75
|
+
return f"Parameters: {', '.join(params)}" if params else ""
|
|
76
|
+
except (AttributeError, TypeError) as e:
|
|
77
|
+
logger.debug(f"Could not extract schema for tool {getattr(tool, 'name', 'unknown')}: {e}")
|
|
78
|
+
return ""
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def get_enhanced_system_prompt(base_prompt: str, tools: list) -> str:
|
|
82
|
+
"""Enhance system prompt for WatsonX models with tool usage instructions."""
|
|
83
|
+
if not tools or len(tools) <= 1:
|
|
84
|
+
return base_prompt
|
|
85
|
+
|
|
86
|
+
# Build detailed tool descriptions with their parameters
|
|
87
|
+
tool_descriptions = []
|
|
88
|
+
for t in tools:
|
|
89
|
+
schema_desc = _get_tool_schema_description(t)
|
|
90
|
+
if schema_desc:
|
|
91
|
+
tool_descriptions.append(f"- {t.name}: {schema_desc}")
|
|
92
|
+
else:
|
|
93
|
+
tool_descriptions.append(f"- {t.name}")
|
|
94
|
+
|
|
95
|
+
tools_section = "\n".join(tool_descriptions)
|
|
96
|
+
|
|
97
|
+
# Note: "one tool at a time" is a WatsonX platform limitation, not a design choice.
|
|
98
|
+
# WatsonX models don't reliably support parallel tool calls.
|
|
99
|
+
enhancement = f"""
|
|
100
|
+
|
|
101
|
+
TOOL USAGE GUIDELINES:
|
|
102
|
+
|
|
103
|
+
1. ALWAYS call tools when you need information - never say "I cannot" or "I don't have access".
|
|
104
|
+
2. Call one tool at a time, then use its result before calling another tool.
|
|
105
|
+
3. Use ACTUAL values in tool arguments - never use placeholder syntax like <result-from-...>.
|
|
106
|
+
4. Each tool has specific parameters - use the correct ones for each tool.
|
|
107
|
+
|
|
108
|
+
AVAILABLE TOOLS:
|
|
109
|
+
{tools_section}"""
|
|
110
|
+
|
|
111
|
+
return base_prompt + enhancement
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def detect_placeholder_in_args(tool_calls: list) -> tuple[bool, str | None]:
|
|
115
|
+
"""Detect if any tool call contains placeholder syntax in its arguments."""
|
|
116
|
+
if not tool_calls:
|
|
117
|
+
return False, None
|
|
118
|
+
|
|
119
|
+
for tool_call in tool_calls:
|
|
120
|
+
args = tool_call.get("args", {})
|
|
121
|
+
if isinstance(args, dict):
|
|
122
|
+
for key, value in args.items():
|
|
123
|
+
if isinstance(value, str) and PLACEHOLDER_PATTERN.search(value):
|
|
124
|
+
tool_name = tool_call.get("name", "unknown")
|
|
125
|
+
logger.warning(f"[IBM WatsonX] Detected placeholder: {tool_name}.{key}={value}")
|
|
126
|
+
return True, value
|
|
127
|
+
elif isinstance(args, str) and PLACEHOLDER_PATTERN.search(args):
|
|
128
|
+
logger.warning(f"[IBM WatsonX] Detected placeholder in args: {args}")
|
|
129
|
+
return True, args
|
|
130
|
+
return False, None
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _limit_to_single_tool_call(llm_response):
|
|
134
|
+
"""Limit response to single tool call (WatsonX platform limitation)."""
|
|
135
|
+
if not hasattr(llm_response, "tool_calls") or not llm_response.tool_calls:
|
|
136
|
+
return llm_response
|
|
137
|
+
|
|
138
|
+
if len(llm_response.tool_calls) > 1:
|
|
139
|
+
logger.debug(f"[WatsonX] Limiting {len(llm_response.tool_calls)} tool calls to 1")
|
|
140
|
+
llm_response.tool_calls = [llm_response.tool_calls[0]]
|
|
141
|
+
|
|
142
|
+
return llm_response
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _handle_placeholder_in_response(llm_response, messages, llm_auto):
|
|
146
|
+
"""Re-invoke with corrective message if placeholder syntax detected."""
|
|
147
|
+
if not hasattr(llm_response, "tool_calls") or not llm_response.tool_calls:
|
|
148
|
+
return llm_response
|
|
149
|
+
|
|
150
|
+
has_placeholder, _ = detect_placeholder_in_args(llm_response.tool_calls)
|
|
151
|
+
if not has_placeholder:
|
|
152
|
+
return llm_response
|
|
153
|
+
|
|
154
|
+
logger.warning("[WatsonX] Placeholder detected, requesting actual values")
|
|
155
|
+
from langchain_core.messages import SystemMessage
|
|
156
|
+
|
|
157
|
+
corrective_msg = SystemMessage(
|
|
158
|
+
content="Provide your final answer using the actual values from previous tool results."
|
|
159
|
+
)
|
|
160
|
+
messages_list = list(messages.messages) if hasattr(messages, "messages") else list(messages)
|
|
161
|
+
messages_list.append(corrective_msg)
|
|
162
|
+
return llm_auto.invoke(messages_list)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def create_granite_agent(llm, tools: list, prompt: ChatPromptTemplate, forced_iterations: int = 2):
|
|
166
|
+
"""Create a tool calling agent for IBM WatsonX/Granite models.
|
|
167
|
+
|
|
168
|
+
Why this exists: WatsonX models have platform-specific tool calling behavior:
|
|
169
|
+
- With tool_choice='auto': Models often describe tools in text instead of calling them
|
|
170
|
+
- With tool_choice='required': Models can't provide final answers (causes infinite loops)
|
|
171
|
+
- Models only reliably support single tool calls per turn
|
|
172
|
+
|
|
173
|
+
Solution: Dynamic switching between 'required' (to force tool use) and 'auto' (to allow answers).
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
llm: WatsonX language model instance
|
|
177
|
+
tools: Available tools for the agent
|
|
178
|
+
prompt: Chat prompt template
|
|
179
|
+
forced_iterations: Iterations to force tool_choice='required' before allowing 'auto'
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
Runnable agent chain compatible with AgentExecutor
|
|
183
|
+
"""
|
|
184
|
+
if not hasattr(llm, "bind_tools"):
|
|
185
|
+
msg = "WatsonX handler requires a language model with bind_tools support."
|
|
186
|
+
raise ValueError(msg)
|
|
187
|
+
|
|
188
|
+
llm_required = llm.bind_tools(tools or [], tool_choice="required")
|
|
189
|
+
llm_auto = llm.bind_tools(tools or [], tool_choice="auto")
|
|
190
|
+
|
|
191
|
+
def invoke(inputs: dict):
|
|
192
|
+
intermediate_steps = inputs.get("intermediate_steps", [])
|
|
193
|
+
num_steps = len(intermediate_steps)
|
|
194
|
+
|
|
195
|
+
scratchpad = format_to_tool_messages(intermediate_steps)
|
|
196
|
+
messages = prompt.invoke({**inputs, "agent_scratchpad": scratchpad})
|
|
197
|
+
|
|
198
|
+
# Use 'required' for first N iterations, then 'auto' to allow final answers
|
|
199
|
+
use_required = num_steps < forced_iterations
|
|
200
|
+
llm_to_use = llm_required if use_required else llm_auto
|
|
201
|
+
logger.debug(f"[WatsonX] Step {num_steps + 1}, tool_choice={'required' if use_required else 'auto'}")
|
|
202
|
+
|
|
203
|
+
response = llm_to_use.invoke(messages)
|
|
204
|
+
response = _limit_to_single_tool_call(response)
|
|
205
|
+
return _handle_placeholder_in_response(response, messages, llm_auto)
|
|
206
|
+
|
|
207
|
+
return RunnableLambda(invoke) | ToolsAgentOutputParser()
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
# Alias for backwards compatibility
|
|
211
|
+
create_watsonx_agent = create_granite_agent
|
|
@@ -2,6 +2,13 @@ from langchain.agents import create_tool_calling_agent
|
|
|
2
2
|
from langchain_core.prompts import ChatPromptTemplate
|
|
3
3
|
|
|
4
4
|
from lfx.base.agents.agent import LCToolsAgentComponent
|
|
5
|
+
|
|
6
|
+
# IBM Granite-specific logic is in a separate file
|
|
7
|
+
from lfx.components.langchain_utilities.ibm_granite_handler import (
|
|
8
|
+
create_granite_agent,
|
|
9
|
+
get_enhanced_system_prompt,
|
|
10
|
+
is_granite_model,
|
|
11
|
+
)
|
|
5
12
|
from lfx.inputs.inputs import (
|
|
6
13
|
DataInput,
|
|
7
14
|
HandleInput,
|
|
@@ -44,15 +51,39 @@ class ToolCallingAgentComponent(LCToolsAgentComponent):
|
|
|
44
51
|
return self.chat_history
|
|
45
52
|
|
|
46
53
|
def create_agent_runnable(self):
|
|
47
|
-
messages = [
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
54
|
+
messages = []
|
|
55
|
+
|
|
56
|
+
# Use local variable to avoid mutating component state on repeated calls
|
|
57
|
+
effective_system_prompt = self.system_prompt or ""
|
|
58
|
+
|
|
59
|
+
# Enhance prompt for IBM Granite models (they need explicit tool usage instructions)
|
|
60
|
+
if is_granite_model(self.llm) and self.tools:
|
|
61
|
+
effective_system_prompt = get_enhanced_system_prompt(effective_system_prompt, self.tools)
|
|
62
|
+
# Store enhanced prompt for use in agent.py without mutating original
|
|
63
|
+
self._effective_system_prompt = effective_system_prompt
|
|
64
|
+
|
|
65
|
+
# Only include system message if system_prompt is provided and not empty
|
|
66
|
+
if effective_system_prompt.strip():
|
|
67
|
+
messages.append(("system", "{system_prompt}"))
|
|
68
|
+
|
|
69
|
+
messages.extend(
|
|
70
|
+
[
|
|
71
|
+
("placeholder", "{chat_history}"),
|
|
72
|
+
("human", "{input}"),
|
|
73
|
+
("placeholder", "{agent_scratchpad}"),
|
|
74
|
+
]
|
|
75
|
+
)
|
|
76
|
+
|
|
53
77
|
prompt = ChatPromptTemplate.from_messages(messages)
|
|
54
78
|
self.validate_tool_names()
|
|
79
|
+
|
|
55
80
|
try:
|
|
81
|
+
# Use IBM Granite-specific agent if detected
|
|
82
|
+
# Other WatsonX models (Llama, Mistral, etc.) use default behavior
|
|
83
|
+
if is_granite_model(self.llm) and self.tools:
|
|
84
|
+
return create_granite_agent(self.llm, self.tools, prompt)
|
|
85
|
+
|
|
86
|
+
# Default behavior for other models (including non-Granite WatsonX models)
|
|
56
87
|
return create_tool_calling_agent(self.llm, self.tools or [], prompt)
|
|
57
88
|
except NotImplementedError as e:
|
|
58
89
|
message = f"{self.display_name} does not support tool calling. Please try using a compatible model."
|
|
@@ -4,8 +4,13 @@ from typing import TYPE_CHECKING, Any, cast
|
|
|
4
4
|
|
|
5
5
|
import toml # type: ignore[import-untyped]
|
|
6
6
|
|
|
7
|
+
from lfx.base.models.unified_models import (
|
|
8
|
+
get_language_model_options,
|
|
9
|
+
get_model_classes,
|
|
10
|
+
update_model_options_in_build_config,
|
|
11
|
+
)
|
|
7
12
|
from lfx.custom.custom_component.component import Component
|
|
8
|
-
from lfx.io import BoolInput, DataFrameInput,
|
|
13
|
+
from lfx.io import BoolInput, DataFrameInput, MessageTextInput, ModelInput, MultilineInput, Output, SecretStrInput
|
|
9
14
|
from lfx.log.logger import logger
|
|
10
15
|
from lfx.schema.dataframe import DataFrame
|
|
11
16
|
|
|
@@ -20,13 +25,20 @@ class BatchRunComponent(Component):
|
|
|
20
25
|
icon = "List"
|
|
21
26
|
|
|
22
27
|
inputs = [
|
|
23
|
-
|
|
28
|
+
ModelInput(
|
|
24
29
|
name="model",
|
|
25
30
|
display_name="Language Model",
|
|
26
|
-
info="
|
|
27
|
-
|
|
31
|
+
info="Select your model provider",
|
|
32
|
+
real_time_refresh=True,
|
|
28
33
|
required=True,
|
|
29
34
|
),
|
|
35
|
+
SecretStrInput(
|
|
36
|
+
name="api_key",
|
|
37
|
+
display_name="API Key",
|
|
38
|
+
info="Model Provider API key",
|
|
39
|
+
real_time_refresh=True,
|
|
40
|
+
advanced=True,
|
|
41
|
+
),
|
|
30
42
|
MultilineInput(
|
|
31
43
|
name="system_message",
|
|
32
44
|
display_name="Instructions",
|
|
@@ -76,6 +88,17 @@ class BatchRunComponent(Component):
|
|
|
76
88
|
),
|
|
77
89
|
]
|
|
78
90
|
|
|
91
|
+
def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):
|
|
92
|
+
"""Dynamically update build config with user-filtered model options."""
|
|
93
|
+
return update_model_options_in_build_config(
|
|
94
|
+
component=self,
|
|
95
|
+
build_config=build_config,
|
|
96
|
+
cache_key_prefix="language_model_options",
|
|
97
|
+
get_options_func=get_language_model_options,
|
|
98
|
+
field_name=field_name,
|
|
99
|
+
field_value=field_value,
|
|
100
|
+
)
|
|
101
|
+
|
|
79
102
|
def _format_row_as_toml(self, row: dict[str, Any]) -> str:
|
|
80
103
|
"""Convert a dictionary (row) into a TOML-formatted string."""
|
|
81
104
|
formatted_dict = {str(col): {"value": str(val)} for col, val in row.items()}
|
|
@@ -111,20 +134,43 @@ class BatchRunComponent(Component):
|
|
|
111
134
|
}
|
|
112
135
|
|
|
113
136
|
async def run_batch(self) -> DataFrame:
|
|
114
|
-
"""Process each row in df[column_name] with the language model asynchronously.
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
137
|
+
"""Process each row in df[column_name] with the language model asynchronously."""
|
|
138
|
+
# Check if model is already an instance (for testing) or needs to be instantiated
|
|
139
|
+
if isinstance(self.model, list):
|
|
140
|
+
# Extract model configuration
|
|
141
|
+
model_selection = self.model[0]
|
|
142
|
+
model_name = model_selection.get("name")
|
|
143
|
+
provider = model_selection.get("provider")
|
|
144
|
+
metadata = model_selection.get("metadata", {})
|
|
145
|
+
|
|
146
|
+
# Get model class and parameters from metadata
|
|
147
|
+
model_class = get_model_classes().get(metadata.get("model_class"))
|
|
148
|
+
if model_class is None:
|
|
149
|
+
msg = f"No model class defined for {model_name}"
|
|
150
|
+
raise ValueError(msg)
|
|
151
|
+
|
|
152
|
+
api_key_param = metadata.get("api_key_param", "api_key")
|
|
153
|
+
model_name_param = metadata.get("model_name_param", "model")
|
|
154
|
+
|
|
155
|
+
# Get API key from global variables
|
|
156
|
+
from lfx.base.models.unified_models import get_api_key_for_provider
|
|
157
|
+
|
|
158
|
+
api_key = get_api_key_for_provider(self.user_id, provider, self.api_key)
|
|
159
|
+
|
|
160
|
+
if not api_key and provider != "Ollama":
|
|
161
|
+
msg = f"{provider} API key is required. Please configure it globally."
|
|
162
|
+
raise ValueError(msg)
|
|
163
|
+
|
|
164
|
+
# Instantiate the model
|
|
165
|
+
kwargs = {
|
|
166
|
+
model_name_param: model_name,
|
|
167
|
+
api_key_param: api_key,
|
|
168
|
+
}
|
|
169
|
+
model: Runnable = model_class(**kwargs)
|
|
170
|
+
else:
|
|
171
|
+
# Model is already an instance (typically in tests)
|
|
172
|
+
model = self.model
|
|
173
|
+
|
|
128
174
|
system_msg = self.system_message or ""
|
|
129
175
|
df: DataFrame = self.df
|
|
130
176
|
col_name = self.column_name or ""
|