lfx-nightly 0.2.0.dev41__py3-none-any.whl → 0.3.0.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. lfx/__main__.py +137 -6
  2. lfx/_assets/component_index.json +1 -1
  3. lfx/base/agents/agent.py +10 -6
  4. lfx/base/agents/altk_base_agent.py +5 -3
  5. lfx/base/agents/altk_tool_wrappers.py +1 -1
  6. lfx/base/agents/events.py +1 -1
  7. lfx/base/agents/utils.py +4 -0
  8. lfx/base/composio/composio_base.py +78 -41
  9. lfx/base/data/cloud_storage_utils.py +156 -0
  10. lfx/base/data/docling_utils.py +130 -55
  11. lfx/base/datastax/astradb_base.py +75 -64
  12. lfx/base/embeddings/embeddings_class.py +113 -0
  13. lfx/base/models/__init__.py +11 -1
  14. lfx/base/models/google_generative_ai_constants.py +33 -9
  15. lfx/base/models/model_metadata.py +6 -0
  16. lfx/base/models/ollama_constants.py +196 -30
  17. lfx/base/models/openai_constants.py +37 -10
  18. lfx/base/models/unified_models.py +1123 -0
  19. lfx/base/models/watsonx_constants.py +43 -4
  20. lfx/base/prompts/api_utils.py +40 -5
  21. lfx/base/tools/component_tool.py +2 -9
  22. lfx/cli/__init__.py +10 -2
  23. lfx/cli/commands.py +3 -0
  24. lfx/cli/run.py +65 -409
  25. lfx/cli/script_loader.py +18 -7
  26. lfx/cli/validation.py +6 -3
  27. lfx/components/__init__.py +0 -3
  28. lfx/components/composio/github_composio.py +1 -1
  29. lfx/components/cuga/cuga_agent.py +39 -27
  30. lfx/components/data_source/api_request.py +4 -2
  31. lfx/components/datastax/astradb_assistant_manager.py +4 -2
  32. lfx/components/docling/__init__.py +45 -11
  33. lfx/components/docling/docling_inline.py +39 -49
  34. lfx/components/docling/docling_remote.py +1 -0
  35. lfx/components/elastic/opensearch_multimodal.py +1733 -0
  36. lfx/components/files_and_knowledge/file.py +384 -36
  37. lfx/components/files_and_knowledge/ingestion.py +8 -0
  38. lfx/components/files_and_knowledge/retrieval.py +10 -0
  39. lfx/components/files_and_knowledge/save_file.py +91 -88
  40. lfx/components/langchain_utilities/ibm_granite_handler.py +211 -0
  41. lfx/components/langchain_utilities/tool_calling.py +37 -6
  42. lfx/components/llm_operations/batch_run.py +64 -18
  43. lfx/components/llm_operations/lambda_filter.py +213 -101
  44. lfx/components/llm_operations/llm_conditional_router.py +39 -7
  45. lfx/components/llm_operations/structured_output.py +38 -12
  46. lfx/components/models/__init__.py +16 -74
  47. lfx/components/models_and_agents/agent.py +51 -203
  48. lfx/components/models_and_agents/embedding_model.py +171 -255
  49. lfx/components/models_and_agents/language_model.py +54 -318
  50. lfx/components/models_and_agents/mcp_component.py +96 -10
  51. lfx/components/models_and_agents/prompt.py +105 -18
  52. lfx/components/ollama/ollama_embeddings.py +111 -29
  53. lfx/components/openai/openai_chat_model.py +1 -1
  54. lfx/components/processing/text_operations.py +580 -0
  55. lfx/components/vllm/__init__.py +37 -0
  56. lfx/components/vllm/vllm.py +141 -0
  57. lfx/components/vllm/vllm_embeddings.py +110 -0
  58. lfx/custom/custom_component/component.py +65 -10
  59. lfx/custom/custom_component/custom_component.py +8 -6
  60. lfx/events/observability/__init__.py +0 -0
  61. lfx/events/observability/lifecycle_events.py +111 -0
  62. lfx/field_typing/__init__.py +57 -58
  63. lfx/graph/graph/base.py +40 -1
  64. lfx/graph/utils.py +109 -30
  65. lfx/graph/vertex/base.py +75 -23
  66. lfx/graph/vertex/vertex_types.py +0 -5
  67. lfx/inputs/__init__.py +2 -0
  68. lfx/inputs/input_mixin.py +55 -0
  69. lfx/inputs/inputs.py +120 -0
  70. lfx/interface/components.py +24 -7
  71. lfx/interface/initialize/loading.py +42 -12
  72. lfx/io/__init__.py +2 -0
  73. lfx/run/__init__.py +5 -0
  74. lfx/run/base.py +464 -0
  75. lfx/schema/__init__.py +50 -0
  76. lfx/schema/data.py +1 -1
  77. lfx/schema/image.py +26 -7
  78. lfx/schema/message.py +104 -11
  79. lfx/schema/workflow.py +171 -0
  80. lfx/services/deps.py +12 -0
  81. lfx/services/interfaces.py +43 -1
  82. lfx/services/mcp_composer/service.py +7 -1
  83. lfx/services/schema.py +1 -0
  84. lfx/services/settings/auth.py +95 -4
  85. lfx/services/settings/base.py +11 -1
  86. lfx/services/settings/constants.py +2 -0
  87. lfx/services/settings/utils.py +82 -0
  88. lfx/services/storage/local.py +13 -8
  89. lfx/services/transaction/__init__.py +5 -0
  90. lfx/services/transaction/service.py +35 -0
  91. lfx/tests/unit/components/__init__.py +0 -0
  92. lfx/utils/constants.py +2 -0
  93. lfx/utils/mustache_security.py +79 -0
  94. lfx/utils/validate_cloud.py +81 -3
  95. {lfx_nightly-0.2.0.dev41.dist-info → lfx_nightly-0.3.0.dev3.dist-info}/METADATA +7 -2
  96. {lfx_nightly-0.2.0.dev41.dist-info → lfx_nightly-0.3.0.dev3.dist-info}/RECORD +98 -80
  97. {lfx_nightly-0.2.0.dev41.dist-info → lfx_nightly-0.3.0.dev3.dist-info}/WHEEL +0 -0
  98. {lfx_nightly-0.2.0.dev41.dist-info → lfx_nightly-0.3.0.dev3.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,7 @@
1
1
  import json
2
2
  from collections.abc import AsyncIterator, Iterator
3
3
  from pathlib import Path
4
+ from typing import Any
4
5
 
5
6
  import orjson
6
7
  import pandas as pd
@@ -13,6 +14,15 @@ from lfx.io import BoolInput, DropdownInput, HandleInput, SecretStrInput, StrInp
13
14
  from lfx.schema import Data, DataFrame, Message
14
15
  from lfx.services.deps import get_settings_service, get_storage_service, session_scope
15
16
  from lfx.template.field.base import Output
17
+ from lfx.utils.validate_cloud import is_astra_cloud_environment
18
+
19
+
20
+ def _get_storage_location_options():
21
+ """Get storage location options, filtering out Local if in Astra cloud environment."""
22
+ all_options = [{"name": "AWS", "icon": "Amazon"}, {"name": "Google Drive", "icon": "google"}]
23
+ if is_astra_cloud_environment():
24
+ return all_options
25
+ return [{"name": "Local", "icon": "hard-drive"}, *all_options]
16
26
 
17
27
 
18
28
  class SaveToFileComponent(Component):
@@ -49,11 +59,7 @@ class SaveToFileComponent(Component):
49
59
  display_name="Storage Location",
50
60
  placeholder="Select Location",
51
61
  info="Choose where to save the file.",
52
- options=[
53
- {"name": "Local", "icon": "hard-drive"},
54
- {"name": "AWS", "icon": "Amazon"},
55
- {"name": "Google Drive", "icon": "google"},
56
- ],
62
+ options=_get_storage_location_options(),
57
63
  real_time_refresh=True,
58
64
  limit=1,
59
65
  ),
@@ -116,6 +122,7 @@ class SaveToFileComponent(Component):
116
122
  info="AWS Access key ID.",
117
123
  show=False,
118
124
  advanced=True,
125
+ required=True,
119
126
  ),
120
127
  SecretStrInput(
121
128
  name="aws_secret_access_key",
@@ -123,6 +130,7 @@ class SaveToFileComponent(Component):
123
130
  info="AWS Secret Key.",
124
131
  show=False,
125
132
  advanced=True,
133
+ required=True,
126
134
  ),
127
135
  StrInput(
128
136
  name="bucket_name",
@@ -130,6 +138,7 @@ class SaveToFileComponent(Component):
130
138
  info="Enter the name of the S3 bucket.",
131
139
  show=False,
132
140
  advanced=True,
141
+ required=True,
133
142
  ),
134
143
  StrInput(
135
144
  name="aws_region",
@@ -152,6 +161,7 @@ class SaveToFileComponent(Component):
152
161
  info="Your Google Cloud Platform service account JSON key as a secret string (complete JSON content).",
153
162
  show=False,
154
163
  advanced=True,
164
+ required=True,
155
165
  ),
156
166
  StrInput(
157
167
  name="folder_id",
@@ -170,6 +180,12 @@ class SaveToFileComponent(Component):
170
180
 
171
181
  def update_build_config(self, build_config, field_value, field_name=None):
172
182
  """Update build configuration to show/hide fields based on storage location selection."""
183
+ # Update options dynamically based on cloud environment
184
+ # This ensures options are refreshed when build_config is updated
185
+ if "storage_location" in build_config:
186
+ updated_options = _get_storage_location_options()
187
+ build_config["storage_location"]["options"] = updated_options
188
+
173
189
  if field_name != "storage_location":
174
190
  return build_config
175
191
 
@@ -224,12 +240,14 @@ class SaveToFileComponent(Component):
224
240
  for f_name in aws_fields:
225
241
  if f_name in build_config:
226
242
  build_config[f_name]["show"] = True
243
+ build_config[f_name]["advanced"] = False
227
244
 
228
245
  elif location == "Google Drive":
229
246
  gdrive_fields = ["gdrive_format", "service_account_key", "folder_id"]
230
247
  for f_name in gdrive_fields:
231
248
  if f_name in build_config:
232
249
  build_config[f_name]["show"] = True
250
+ build_config[f_name]["advanced"] = False
233
251
 
234
252
  return build_config
235
253
 
@@ -249,6 +267,11 @@ class SaveToFileComponent(Component):
249
267
  msg = "Storage location must be selected."
250
268
  raise ValueError(msg)
251
269
 
270
+ # Check if Local storage is disabled in cloud environment
271
+ if storage_location == "Local" and is_astra_cloud_environment():
272
+ msg = "Local storage is not available in cloud environment. Please use AWS or Google Drive."
273
+ raise ValueError(msg)
274
+
252
275
  # Route to appropriate save method based on storage location
253
276
  if storage_location == "Local":
254
277
  return await self._save_to_local()
@@ -540,32 +563,67 @@ class SaveToFileComponent(Component):
540
563
 
541
564
  async def _save_to_aws(self) -> Message:
542
565
  """Save file to AWS S3 using S3 functionality."""
566
+ import os
567
+
568
+ import boto3
569
+
570
+ from lfx.base.data.cloud_storage_utils import create_s3_client, validate_aws_credentials
571
+
572
+ # Get AWS credentials from component inputs or fall back to environment variables
573
+ aws_access_key_id = getattr(self, "aws_access_key_id", None)
574
+ if aws_access_key_id and hasattr(aws_access_key_id, "get_secret_value"):
575
+ aws_access_key_id = aws_access_key_id.get_secret_value()
576
+ if not aws_access_key_id:
577
+ aws_access_key_id = os.getenv("AWS_ACCESS_KEY_ID")
578
+
579
+ aws_secret_access_key = getattr(self, "aws_secret_access_key", None)
580
+ if aws_secret_access_key and hasattr(aws_secret_access_key, "get_secret_value"):
581
+ aws_secret_access_key = aws_secret_access_key.get_secret_value()
582
+ if not aws_secret_access_key:
583
+ aws_secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY")
584
+
585
+ bucket_name = getattr(self, "bucket_name", None)
586
+ if not bucket_name:
587
+ # Try to get from storage service settings
588
+ settings = get_settings_service().settings
589
+ bucket_name = settings.object_storage_bucket_name
590
+
543
591
  # Validate AWS credentials
544
- if not getattr(self, "aws_access_key_id", None):
545
- msg = "AWS Access Key ID is required for S3 storage"
592
+ if not aws_access_key_id:
593
+ msg = (
594
+ "AWS Access Key ID is required for S3 storage. Provide it as a component input "
595
+ "or set AWS_ACCESS_KEY_ID environment variable."
596
+ )
546
597
  raise ValueError(msg)
547
- if not getattr(self, "aws_secret_access_key", None):
548
- msg = "AWS Secret Key is required for S3 storage"
598
+ if not aws_secret_access_key:
599
+ msg = (
600
+ "AWS Secret Key is required for S3 storage. Provide it as a component input "
601
+ "or set AWS_SECRET_ACCESS_KEY environment variable."
602
+ )
549
603
  raise ValueError(msg)
550
- if not getattr(self, "bucket_name", None):
551
- msg = "S3 Bucket Name is required for S3 storage"
604
+ if not bucket_name:
605
+ msg = (
606
+ "S3 Bucket Name is required for S3 storage. Provide it as a component input "
607
+ "or set LANGFLOW_OBJECT_STORAGE_BUCKET_NAME environment variable."
608
+ )
552
609
  raise ValueError(msg)
553
610
 
554
- # Use S3 upload functionality
555
- try:
556
- import boto3
557
- except ImportError as e:
558
- msg = "boto3 is not installed. Please install it using `uv pip install boto3`."
559
- raise ImportError(msg) from e
611
+ # Validate AWS credentials
612
+ validate_aws_credentials(self)
560
613
 
561
614
  # Create S3 client
562
- client_config = {
563
- "aws_access_key_id": self.aws_access_key_id,
564
- "aws_secret_access_key": self.aws_secret_access_key,
615
+ s3_client = create_s3_client(self)
616
+ client_config: dict[str, Any] = {
617
+ "aws_access_key_id": str(aws_access_key_id),
618
+ "aws_secret_access_key": str(aws_secret_access_key),
565
619
  }
566
620
 
567
- if hasattr(self, "aws_region") and self.aws_region:
568
- client_config["region_name"] = self.aws_region
621
+ # Get region from component input, environment variable, or settings
622
+ aws_region = getattr(self, "aws_region", None)
623
+ if not aws_region:
624
+ aws_region = os.getenv("AWS_DEFAULT_REGION") or os.getenv("AWS_REGION")
625
+ if aws_region:
626
+ client_config["region_name"] = str(aws_region)
569
627
 
570
628
  s3_client = boto3.client("s3", **client_config)
571
629
 
@@ -589,8 +647,8 @@ class SaveToFileComponent(Component):
589
647
 
590
648
  try:
591
649
  # Upload to S3
592
- s3_client.upload_file(temp_file_path, self.bucket_name, file_path)
593
- s3_url = f"s3://{self.bucket_name}/{file_path}"
650
+ s3_client.upload_file(temp_file_path, bucket_name, file_path)
651
+ s3_url = f"s3://{bucket_name}/{file_path}"
594
652
  return Message(text=f"File successfully uploaded to {s3_url}")
595
653
  finally:
596
654
  # Clean up temp file
@@ -599,6 +657,12 @@ class SaveToFileComponent(Component):
599
657
 
600
658
  async def _save_to_google_drive(self) -> Message:
601
659
  """Save file to Google Drive using Google Drive functionality."""
660
+ import tempfile
661
+
662
+ from googleapiclient.http import MediaFileUpload
663
+
664
+ from lfx.base.data.cloud_storage_utils import create_google_drive_service
665
+
602
666
  # Validate Google Drive credentials
603
667
  if not getattr(self, "service_account_key", None):
604
668
  msg = "GCP Credentials Secret Key is required for Google Drive storage"
@@ -607,71 +671,10 @@ class SaveToFileComponent(Component):
607
671
  msg = "Google Drive Folder ID is required for Google Drive storage"
608
672
  raise ValueError(msg)
609
673
 
610
- # Use Google Drive upload functionality
611
- try:
612
- import json
613
- import tempfile
614
-
615
- from google.oauth2 import service_account
616
- from googleapiclient.discovery import build
617
- from googleapiclient.http import MediaFileUpload
618
- except ImportError as e:
619
- msg = "Google API client libraries are not installed. Please install them."
620
- raise ImportError(msg) from e
621
-
622
- # Parse credentials with multiple fallback strategies
623
- credentials_dict = None
624
- parse_errors = []
625
-
626
- # Strategy 1: Parse as-is with strict=False to allow control characters
627
- try:
628
- credentials_dict = json.loads(self.service_account_key, strict=False)
629
- except json.JSONDecodeError as e:
630
- parse_errors.append(f"Standard parse: {e!s}")
631
-
632
- # Strategy 2: Strip whitespace and try again
633
- if credentials_dict is None:
634
- try:
635
- cleaned_key = self.service_account_key.strip()
636
- credentials_dict = json.loads(cleaned_key, strict=False)
637
- except json.JSONDecodeError as e:
638
- parse_errors.append(f"Stripped parse: {e!s}")
639
-
640
- # Strategy 3: Check if it's double-encoded (JSON string of a JSON string)
641
- if credentials_dict is None:
642
- try:
643
- decoded_once = json.loads(self.service_account_key, strict=False)
644
- if isinstance(decoded_once, str):
645
- credentials_dict = json.loads(decoded_once, strict=False)
646
- else:
647
- credentials_dict = decoded_once
648
- except json.JSONDecodeError as e:
649
- parse_errors.append(f"Double-encoded parse: {e!s}")
650
-
651
- # Strategy 4: Try to fix common issues with newlines in the private_key field
652
- if credentials_dict is None:
653
- try:
654
- # Replace literal \n with actual newlines which is common in pasted JSON
655
- fixed_key = self.service_account_key.replace("\\n", "\n")
656
- credentials_dict = json.loads(fixed_key, strict=False)
657
- except json.JSONDecodeError as e:
658
- parse_errors.append(f"Newline-fixed parse: {e!s}")
659
-
660
- if credentials_dict is None:
661
- error_details = "; ".join(parse_errors)
662
- msg = (
663
- f"Unable to parse service account key JSON. Tried multiple strategies: {error_details}. "
664
- "Please ensure you've copied the entire JSON content from your service account key file. "
665
- "The JSON should start with '{' and contain fields like 'type', 'project_id', 'private_key', etc."
666
- )
667
- raise ValueError(msg)
668
-
669
- # Create Google Drive service with appropriate scopes
670
- # Use drive scope for folder access, file scope is too restrictive for folder verification
671
- credentials = service_account.Credentials.from_service_account_info(
672
- credentials_dict, scopes=["https://www.googleapis.com/auth/drive"]
674
+ # Create Google Drive service with full drive scope (needed for folder operations)
675
+ drive_service, credentials = create_google_drive_service(
676
+ self.service_account_key, scopes=["https://www.googleapis.com/auth/drive"], return_credentials=True
673
677
  )
674
- drive_service = build("drive", "v3", credentials=credentials)
675
678
 
676
679
  # Extract content and format
677
680
  content = self._extract_content_for_upload()
@@ -0,0 +1,211 @@
1
+ """IBM WatsonX-specific tool calling logic.
2
+
3
+ This module contains all the specialized handling for IBM WatsonX models
4
+ which have different tool calling behavior compared to other LLMs.
5
+
6
+ The tool calling issues affect ALL models on the WatsonX platform,
7
+ not just Granite models. This includes:
8
+ - meta-llama models
9
+ - mistral models
10
+ - granite models
11
+ - any other model running through WatsonX
12
+ """
13
+
14
+ import re
15
+
16
+ from langchain.agents.format_scratchpad.tools import format_to_tool_messages
17
+ from langchain.agents.output_parsers.tools import ToolsAgentOutputParser
18
+ from langchain_core.prompts import ChatPromptTemplate
19
+ from langchain_core.runnables import RunnableLambda
20
+
21
+ from lfx.log.logger import logger
22
+
23
+ # Pattern to detect placeholder usage in tool arguments
24
+ PLACEHOLDER_PATTERN = re.compile(
25
+ r"<[^>]*(?:result|value|output|response|data|from|extract|previous|current|date|input|query|search|tool)[^>]*>",
26
+ re.IGNORECASE,
27
+ )
28
+
29
+
30
+ def is_watsonx_model(llm) -> bool:
31
+ """Check if the LLM is an IBM WatsonX model (any model, not just Granite).
32
+
33
+ This detects the provider (WatsonX) rather than a specific model,
34
+ since tool calling issues affect all models on the WatsonX platform.
35
+ """
36
+ # Check class name for WatsonX (e.g., ChatWatsonx)
37
+ class_name = type(llm).__name__.lower()
38
+ if "watsonx" in class_name:
39
+ return True
40
+
41
+ # Fallback: check module name (e.g., langchain_ibm)
42
+ module_name = getattr(type(llm), "__module__", "").lower()
43
+ return "watsonx" in module_name or "langchain_ibm" in module_name
44
+
45
+
46
+ def is_granite_model(llm) -> bool:
47
+ """Check if the LLM is an IBM Granite model.
48
+
49
+ DEPRECATED: Use is_watsonx_model() instead.
50
+ Kept for backwards compatibility.
51
+ """
52
+ model_id = getattr(llm, "model_id", getattr(llm, "model_name", ""))
53
+ return "granite" in str(model_id).lower()
54
+
55
+
56
+ def _get_tool_schema_description(tool) -> str:
57
+ """Extract a brief description of the tool's expected parameters.
58
+
59
+ Returns empty string if schema extraction fails (graceful degradation).
60
+ """
61
+ if not hasattr(tool, "args_schema") or not tool.args_schema:
62
+ return ""
63
+
64
+ schema = tool.args_schema
65
+ if not hasattr(schema, "model_fields"):
66
+ return ""
67
+
68
+ try:
69
+ fields = schema.model_fields
70
+ params = []
71
+ for name, field in fields.items():
72
+ required = field.is_required() if hasattr(field, "is_required") else True
73
+ req_str = "(required)" if required else "(optional)"
74
+ params.append(f"{name} {req_str}")
75
+ return f"Parameters: {', '.join(params)}" if params else ""
76
+ except (AttributeError, TypeError) as e:
77
+ logger.debug(f"Could not extract schema for tool {getattr(tool, 'name', 'unknown')}: {e}")
78
+ return ""
79
+
80
+
81
+ def get_enhanced_system_prompt(base_prompt: str, tools: list) -> str:
82
+ """Enhance system prompt for WatsonX models with tool usage instructions."""
83
+ if not tools or len(tools) <= 1:
84
+ return base_prompt
85
+
86
+ # Build detailed tool descriptions with their parameters
87
+ tool_descriptions = []
88
+ for t in tools:
89
+ schema_desc = _get_tool_schema_description(t)
90
+ if schema_desc:
91
+ tool_descriptions.append(f"- {t.name}: {schema_desc}")
92
+ else:
93
+ tool_descriptions.append(f"- {t.name}")
94
+
95
+ tools_section = "\n".join(tool_descriptions)
96
+
97
+ # Note: "one tool at a time" is a WatsonX platform limitation, not a design choice.
98
+ # WatsonX models don't reliably support parallel tool calls.
99
+ enhancement = f"""
100
+
101
+ TOOL USAGE GUIDELINES:
102
+
103
+ 1. ALWAYS call tools when you need information - never say "I cannot" or "I don't have access".
104
+ 2. Call one tool at a time, then use its result before calling another tool.
105
+ 3. Use ACTUAL values in tool arguments - never use placeholder syntax like <result-from-...>.
106
+ 4. Each tool has specific parameters - use the correct ones for each tool.
107
+
108
+ AVAILABLE TOOLS:
109
+ {tools_section}"""
110
+
111
+ return base_prompt + enhancement
112
+
113
+
114
+ def detect_placeholder_in_args(tool_calls: list) -> tuple[bool, str | None]:
115
+ """Detect if any tool call contains placeholder syntax in its arguments."""
116
+ if not tool_calls:
117
+ return False, None
118
+
119
+ for tool_call in tool_calls:
120
+ args = tool_call.get("args", {})
121
+ if isinstance(args, dict):
122
+ for key, value in args.items():
123
+ if isinstance(value, str) and PLACEHOLDER_PATTERN.search(value):
124
+ tool_name = tool_call.get("name", "unknown")
125
+ logger.warning(f"[IBM WatsonX] Detected placeholder: {tool_name}.{key}={value}")
126
+ return True, value
127
+ elif isinstance(args, str) and PLACEHOLDER_PATTERN.search(args):
128
+ logger.warning(f"[IBM WatsonX] Detected placeholder in args: {args}")
129
+ return True, args
130
+ return False, None
131
+
132
+
133
+ def _limit_to_single_tool_call(llm_response):
134
+ """Limit response to single tool call (WatsonX platform limitation)."""
135
+ if not hasattr(llm_response, "tool_calls") or not llm_response.tool_calls:
136
+ return llm_response
137
+
138
+ if len(llm_response.tool_calls) > 1:
139
+ logger.debug(f"[WatsonX] Limiting {len(llm_response.tool_calls)} tool calls to 1")
140
+ llm_response.tool_calls = [llm_response.tool_calls[0]]
141
+
142
+ return llm_response
143
+
144
+
145
+ def _handle_placeholder_in_response(llm_response, messages, llm_auto):
146
+ """Re-invoke with corrective message if placeholder syntax detected."""
147
+ if not hasattr(llm_response, "tool_calls") or not llm_response.tool_calls:
148
+ return llm_response
149
+
150
+ has_placeholder, _ = detect_placeholder_in_args(llm_response.tool_calls)
151
+ if not has_placeholder:
152
+ return llm_response
153
+
154
+ logger.warning("[WatsonX] Placeholder detected, requesting actual values")
155
+ from langchain_core.messages import SystemMessage
156
+
157
+ corrective_msg = SystemMessage(
158
+ content="Provide your final answer using the actual values from previous tool results."
159
+ )
160
+ messages_list = list(messages.messages) if hasattr(messages, "messages") else list(messages)
161
+ messages_list.append(corrective_msg)
162
+ return llm_auto.invoke(messages_list)
163
+
164
+
165
+ def create_granite_agent(llm, tools: list, prompt: ChatPromptTemplate, forced_iterations: int = 2):
166
+ """Create a tool calling agent for IBM WatsonX/Granite models.
167
+
168
+ Why this exists: WatsonX models have platform-specific tool calling behavior:
169
+ - With tool_choice='auto': Models often describe tools in text instead of calling them
170
+ - With tool_choice='required': Models can't provide final answers (causes infinite loops)
171
+ - Models only reliably support single tool calls per turn
172
+
173
+ Solution: Dynamic switching between 'required' (to force tool use) and 'auto' (to allow answers).
174
+
175
+ Args:
176
+ llm: WatsonX language model instance
177
+ tools: Available tools for the agent
178
+ prompt: Chat prompt template
179
+ forced_iterations: Iterations to force tool_choice='required' before allowing 'auto'
180
+
181
+ Returns:
182
+ Runnable agent chain compatible with AgentExecutor
183
+ """
184
+ if not hasattr(llm, "bind_tools"):
185
+ msg = "WatsonX handler requires a language model with bind_tools support."
186
+ raise ValueError(msg)
187
+
188
+ llm_required = llm.bind_tools(tools or [], tool_choice="required")
189
+ llm_auto = llm.bind_tools(tools or [], tool_choice="auto")
190
+
191
+ def invoke(inputs: dict):
192
+ intermediate_steps = inputs.get("intermediate_steps", [])
193
+ num_steps = len(intermediate_steps)
194
+
195
+ scratchpad = format_to_tool_messages(intermediate_steps)
196
+ messages = prompt.invoke({**inputs, "agent_scratchpad": scratchpad})
197
+
198
+ # Use 'required' for first N iterations, then 'auto' to allow final answers
199
+ use_required = num_steps < forced_iterations
200
+ llm_to_use = llm_required if use_required else llm_auto
201
+ logger.debug(f"[WatsonX] Step {num_steps + 1}, tool_choice={'required' if use_required else 'auto'}")
202
+
203
+ response = llm_to_use.invoke(messages)
204
+ response = _limit_to_single_tool_call(response)
205
+ return _handle_placeholder_in_response(response, messages, llm_auto)
206
+
207
+ return RunnableLambda(invoke) | ToolsAgentOutputParser()
208
+
209
+
210
+ # Alias for backwards compatibility
211
+ create_watsonx_agent = create_granite_agent
@@ -2,6 +2,13 @@ from langchain.agents import create_tool_calling_agent
2
2
  from langchain_core.prompts import ChatPromptTemplate
3
3
 
4
4
  from lfx.base.agents.agent import LCToolsAgentComponent
5
+
6
+ # IBM Granite-specific logic is in a separate file
7
+ from lfx.components.langchain_utilities.ibm_granite_handler import (
8
+ create_granite_agent,
9
+ get_enhanced_system_prompt,
10
+ is_granite_model,
11
+ )
5
12
  from lfx.inputs.inputs import (
6
13
  DataInput,
7
14
  HandleInput,
@@ -44,15 +51,39 @@ class ToolCallingAgentComponent(LCToolsAgentComponent):
44
51
  return self.chat_history
45
52
 
46
53
  def create_agent_runnable(self):
47
- messages = [
48
- ("system", "{system_prompt}"),
49
- ("placeholder", "{chat_history}"),
50
- ("human", "{input}"),
51
- ("placeholder", "{agent_scratchpad}"),
52
- ]
54
+ messages = []
55
+
56
+ # Use local variable to avoid mutating component state on repeated calls
57
+ effective_system_prompt = self.system_prompt or ""
58
+
59
+ # Enhance prompt for IBM Granite models (they need explicit tool usage instructions)
60
+ if is_granite_model(self.llm) and self.tools:
61
+ effective_system_prompt = get_enhanced_system_prompt(effective_system_prompt, self.tools)
62
+ # Store enhanced prompt for use in agent.py without mutating original
63
+ self._effective_system_prompt = effective_system_prompt
64
+
65
+ # Only include system message if system_prompt is provided and not empty
66
+ if effective_system_prompt.strip():
67
+ messages.append(("system", "{system_prompt}"))
68
+
69
+ messages.extend(
70
+ [
71
+ ("placeholder", "{chat_history}"),
72
+ ("human", "{input}"),
73
+ ("placeholder", "{agent_scratchpad}"),
74
+ ]
75
+ )
76
+
53
77
  prompt = ChatPromptTemplate.from_messages(messages)
54
78
  self.validate_tool_names()
79
+
55
80
  try:
81
+ # Use IBM Granite-specific agent if detected
82
+ # Other WatsonX models (Llama, Mistral, etc.) use default behavior
83
+ if is_granite_model(self.llm) and self.tools:
84
+ return create_granite_agent(self.llm, self.tools, prompt)
85
+
86
+ # Default behavior for other models (including non-Granite WatsonX models)
56
87
  return create_tool_calling_agent(self.llm, self.tools or [], prompt)
57
88
  except NotImplementedError as e:
58
89
  message = f"{self.display_name} does not support tool calling. Please try using a compatible model."
@@ -4,8 +4,13 @@ from typing import TYPE_CHECKING, Any, cast
4
4
 
5
5
  import toml # type: ignore[import-untyped]
6
6
 
7
+ from lfx.base.models.unified_models import (
8
+ get_language_model_options,
9
+ get_model_classes,
10
+ update_model_options_in_build_config,
11
+ )
7
12
  from lfx.custom.custom_component.component import Component
8
- from lfx.io import BoolInput, DataFrameInput, HandleInput, MessageTextInput, MultilineInput, Output
13
+ from lfx.io import BoolInput, DataFrameInput, MessageTextInput, ModelInput, MultilineInput, Output, SecretStrInput
9
14
  from lfx.log.logger import logger
10
15
  from lfx.schema.dataframe import DataFrame
11
16
 
@@ -20,13 +25,20 @@ class BatchRunComponent(Component):
20
25
  icon = "List"
21
26
 
22
27
  inputs = [
23
- HandleInput(
28
+ ModelInput(
24
29
  name="model",
25
30
  display_name="Language Model",
26
- info="Connect the 'Language Model' output from your LLM component here.",
27
- input_types=["LanguageModel"],
31
+ info="Select your model provider",
32
+ real_time_refresh=True,
28
33
  required=True,
29
34
  ),
35
+ SecretStrInput(
36
+ name="api_key",
37
+ display_name="API Key",
38
+ info="Model Provider API key",
39
+ real_time_refresh=True,
40
+ advanced=True,
41
+ ),
30
42
  MultilineInput(
31
43
  name="system_message",
32
44
  display_name="Instructions",
@@ -76,6 +88,17 @@ class BatchRunComponent(Component):
76
88
  ),
77
89
  ]
78
90
 
91
+ def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):
92
+ """Dynamically update build config with user-filtered model options."""
93
+ return update_model_options_in_build_config(
94
+ component=self,
95
+ build_config=build_config,
96
+ cache_key_prefix="language_model_options",
97
+ get_options_func=get_language_model_options,
98
+ field_name=field_name,
99
+ field_value=field_value,
100
+ )
101
+
79
102
  def _format_row_as_toml(self, row: dict[str, Any]) -> str:
80
103
  """Convert a dictionary (row) into a TOML-formatted string."""
81
104
  formatted_dict = {str(col): {"value": str(val)} for col, val in row.items()}
@@ -111,20 +134,43 @@ class BatchRunComponent(Component):
111
134
  }
112
135
 
113
136
  async def run_batch(self) -> DataFrame:
114
- """Process each row in df[column_name] with the language model asynchronously.
115
-
116
- Returns:
117
- DataFrame: A new DataFrame containing:
118
- - All original columns
119
- - The model's response column (customizable name)
120
- - 'batch_index' column for processing order
121
- - 'metadata' (optional)
122
-
123
- Raises:
124
- ValueError: If the specified column is not found in the DataFrame
125
- TypeError: If the model is not compatible or input types are wrong
126
- """
127
- model: Runnable = self.model
137
+ """Process each row in df[column_name] with the language model asynchronously."""
138
+ # Check if model is already an instance (for testing) or needs to be instantiated
139
+ if isinstance(self.model, list):
140
+ # Extract model configuration
141
+ model_selection = self.model[0]
142
+ model_name = model_selection.get("name")
143
+ provider = model_selection.get("provider")
144
+ metadata = model_selection.get("metadata", {})
145
+
146
+ # Get model class and parameters from metadata
147
+ model_class = get_model_classes().get(metadata.get("model_class"))
148
+ if model_class is None:
149
+ msg = f"No model class defined for {model_name}"
150
+ raise ValueError(msg)
151
+
152
+ api_key_param = metadata.get("api_key_param", "api_key")
153
+ model_name_param = metadata.get("model_name_param", "model")
154
+
155
+ # Get API key from global variables
156
+ from lfx.base.models.unified_models import get_api_key_for_provider
157
+
158
+ api_key = get_api_key_for_provider(self.user_id, provider, self.api_key)
159
+
160
+ if not api_key and provider != "Ollama":
161
+ msg = f"{provider} API key is required. Please configure it globally."
162
+ raise ValueError(msg)
163
+
164
+ # Instantiate the model
165
+ kwargs = {
166
+ model_name_param: model_name,
167
+ api_key_param: api_key,
168
+ }
169
+ model: Runnable = model_class(**kwargs)
170
+ else:
171
+ # Model is already an instance (typically in tests)
172
+ model = self.model
173
+
128
174
  system_msg = self.system_message or ""
129
175
  df: DataFrame = self.df
130
176
  col_name = self.column_name or ""