lfx-nightly 0.2.0.dev26__py3-none-any.whl → 0.2.1.dev7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. lfx/_assets/component_index.json +1 -1
  2. lfx/base/agents/agent.py +9 -4
  3. lfx/base/agents/altk_base_agent.py +16 -3
  4. lfx/base/agents/altk_tool_wrappers.py +1 -1
  5. lfx/base/agents/utils.py +4 -0
  6. lfx/base/composio/composio_base.py +78 -41
  7. lfx/base/data/base_file.py +14 -4
  8. lfx/base/data/cloud_storage_utils.py +156 -0
  9. lfx/base/data/docling_utils.py +191 -65
  10. lfx/base/data/storage_utils.py +109 -0
  11. lfx/base/datastax/astradb_base.py +75 -64
  12. lfx/base/mcp/util.py +2 -2
  13. lfx/base/models/__init__.py +11 -1
  14. lfx/base/models/anthropic_constants.py +21 -12
  15. lfx/base/models/google_generative_ai_constants.py +33 -9
  16. lfx/base/models/model_metadata.py +6 -0
  17. lfx/base/models/ollama_constants.py +196 -30
  18. lfx/base/models/openai_constants.py +37 -10
  19. lfx/base/models/unified_models.py +1123 -0
  20. lfx/base/models/watsonx_constants.py +36 -0
  21. lfx/base/tools/component_tool.py +2 -9
  22. lfx/cli/commands.py +6 -1
  23. lfx/cli/run.py +65 -409
  24. lfx/cli/script_loader.py +13 -3
  25. lfx/components/__init__.py +0 -3
  26. lfx/components/composio/github_composio.py +1 -1
  27. lfx/components/cuga/cuga_agent.py +39 -27
  28. lfx/components/data_source/api_request.py +4 -2
  29. lfx/components/docling/__init__.py +45 -11
  30. lfx/components/docling/chunk_docling_document.py +3 -1
  31. lfx/components/docling/docling_inline.py +39 -49
  32. lfx/components/docling/export_docling_document.py +3 -1
  33. lfx/components/elastic/opensearch_multimodal.py +215 -57
  34. lfx/components/files_and_knowledge/file.py +439 -39
  35. lfx/components/files_and_knowledge/ingestion.py +8 -0
  36. lfx/components/files_and_knowledge/retrieval.py +10 -0
  37. lfx/components/files_and_knowledge/save_file.py +123 -53
  38. lfx/components/ibm/watsonx.py +7 -1
  39. lfx/components/input_output/chat_output.py +7 -1
  40. lfx/components/langchain_utilities/tool_calling.py +14 -6
  41. lfx/components/llm_operations/batch_run.py +80 -25
  42. lfx/components/llm_operations/lambda_filter.py +33 -6
  43. lfx/components/llm_operations/llm_conditional_router.py +39 -7
  44. lfx/components/llm_operations/structured_output.py +38 -12
  45. lfx/components/models/__init__.py +16 -74
  46. lfx/components/models_and_agents/agent.py +51 -201
  47. lfx/components/models_and_agents/embedding_model.py +185 -339
  48. lfx/components/models_and_agents/language_model.py +54 -318
  49. lfx/components/models_and_agents/mcp_component.py +58 -9
  50. lfx/components/ollama/ollama.py +9 -4
  51. lfx/components/ollama/ollama_embeddings.py +2 -1
  52. lfx/components/openai/openai_chat_model.py +1 -1
  53. lfx/components/processing/__init__.py +0 -3
  54. lfx/components/vllm/__init__.py +37 -0
  55. lfx/components/vllm/vllm.py +141 -0
  56. lfx/components/vllm/vllm_embeddings.py +110 -0
  57. lfx/custom/custom_component/custom_component.py +8 -6
  58. lfx/custom/directory_reader/directory_reader.py +5 -2
  59. lfx/graph/utils.py +64 -18
  60. lfx/inputs/__init__.py +2 -0
  61. lfx/inputs/input_mixin.py +54 -0
  62. lfx/inputs/inputs.py +115 -0
  63. lfx/interface/initialize/loading.py +42 -12
  64. lfx/io/__init__.py +2 -0
  65. lfx/run/__init__.py +5 -0
  66. lfx/run/base.py +494 -0
  67. lfx/schema/data.py +1 -1
  68. lfx/schema/image.py +28 -19
  69. lfx/schema/message.py +19 -3
  70. lfx/services/interfaces.py +5 -0
  71. lfx/services/manager.py +5 -4
  72. lfx/services/mcp_composer/service.py +45 -13
  73. lfx/services/settings/auth.py +18 -11
  74. lfx/services/settings/base.py +12 -24
  75. lfx/services/settings/constants.py +2 -0
  76. lfx/services/storage/local.py +37 -0
  77. lfx/services/storage/service.py +19 -0
  78. lfx/utils/constants.py +1 -0
  79. lfx/utils/image.py +29 -11
  80. lfx/utils/validate_cloud.py +14 -3
  81. {lfx_nightly-0.2.0.dev26.dist-info → lfx_nightly-0.2.1.dev7.dist-info}/METADATA +5 -2
  82. {lfx_nightly-0.2.0.dev26.dist-info → lfx_nightly-0.2.1.dev7.dist-info}/RECORD +84 -78
  83. lfx/components/processing/dataframe_to_toolset.py +0 -259
  84. {lfx_nightly-0.2.0.dev26.dist-info → lfx_nightly-0.2.1.dev7.dist-info}/WHEEL +0 -0
  85. {lfx_nightly-0.2.0.dev26.dist-info → lfx_nightly-0.2.1.dev7.dist-info}/entry_points.txt +0 -0
@@ -15,9 +15,13 @@ from lfx.log.logger import logger
15
15
  from lfx.schema.data import Data
16
16
  from lfx.schema.dataframe import DataFrame
17
17
  from lfx.services.deps import get_settings_service, session_scope
18
+ from lfx.utils.validate_cloud import raise_error_if_astra_cloud_disable_component
18
19
 
19
20
  _KNOWLEDGE_BASES_ROOT_PATH: Path | None = None
20
21
 
22
+ # Error message to raise if we're in Astra cloud environment and the component is not supported.
23
+ astra_error_msg = "Knowledge retrieval is not supported in Astra cloud environment."
24
+
21
25
 
22
26
  def _get_knowledge_bases_root_path() -> Path:
23
27
  """Lazy load the knowledge bases root path from settings."""
@@ -95,6 +99,8 @@ class KnowledgeRetrievalComponent(Component):
95
99
  ]
96
100
 
97
101
  async def update_build_config(self, build_config, field_value, field_name=None): # noqa: ARG002
102
+ # Check if we're in Astra cloud environment and raise an error if we are.
103
+ raise_error_if_astra_cloud_disable_component(astra_error_msg)
98
104
  if field_name == "knowledge_base":
99
105
  # Update the knowledge base options dynamically
100
106
  build_config["knowledge_base"]["options"] = await get_knowledge_bases(
@@ -110,6 +116,8 @@ class KnowledgeRetrievalComponent(Component):
110
116
 
111
117
  def _get_kb_metadata(self, kb_path: Path) -> dict:
112
118
  """Load and process knowledge base metadata."""
119
+ # Check if we're in Astra cloud environment and raise an error if we are.
120
+ raise_error_if_astra_cloud_disable_component(astra_error_msg)
113
121
  metadata: dict[str, Any] = {}
114
122
  metadata_file = kb_path / "embedding_metadata.json"
115
123
  if not metadata_file.exists():
@@ -184,6 +192,8 @@ class KnowledgeRetrievalComponent(Component):
184
192
  Returns:
185
193
  A DataFrame containing the data rows from the knowledge base.
186
194
  """
195
+ # Check if we're in Astra cloud environment and raise an error if we are.
196
+ raise_error_if_astra_cloud_disable_component(astra_error_msg)
187
197
  # Get the current user
188
198
  async with session_scope() as db:
189
199
  if not self.user_id:
@@ -1,6 +1,7 @@
1
1
  import json
2
2
  from collections.abc import AsyncIterator, Iterator
3
3
  from pathlib import Path
4
+ from typing import Any
4
5
 
5
6
  import orjson
6
7
  import pandas as pd
@@ -13,6 +14,15 @@ from lfx.io import BoolInput, DropdownInput, HandleInput, SecretStrInput, StrInp
13
14
  from lfx.schema import Data, DataFrame, Message
14
15
  from lfx.services.deps import get_settings_service, get_storage_service, session_scope
15
16
  from lfx.template.field.base import Output
17
+ from lfx.utils.validate_cloud import is_astra_cloud_environment
18
+
19
+
20
+ def _get_storage_location_options():
21
+ """Get storage location options, filtering out Local if in Astra cloud environment."""
22
+ all_options = [{"name": "AWS", "icon": "Amazon"}, {"name": "Google Drive", "icon": "google"}]
23
+ if is_astra_cloud_environment():
24
+ return all_options
25
+ return [{"name": "Local", "icon": "hard-drive"}, *all_options]
16
26
 
17
27
 
18
28
  class SaveToFileComponent(Component):
@@ -49,11 +59,7 @@ class SaveToFileComponent(Component):
49
59
  display_name="Storage Location",
50
60
  placeholder="Select Location",
51
61
  info="Choose where to save the file.",
52
- options=[
53
- {"name": "Local", "icon": "hard-drive"},
54
- {"name": "AWS", "icon": "Amazon"},
55
- {"name": "Google Drive", "icon": "google"},
56
- ],
62
+ options=_get_storage_location_options(),
57
63
  real_time_refresh=True,
58
64
  limit=1,
59
65
  ),
@@ -77,7 +83,10 @@ class SaveToFileComponent(Component):
77
83
  BoolInput(
78
84
  name="append_mode",
79
85
  display_name="Append",
80
- info="Append to file if it exists (only for plain text formats). Disabled for binary formats like Excel.",
86
+ info=(
87
+ "Append to file if it exists (only for Local storage with plain text formats). "
88
+ "Not supported for cloud storage (AWS/Google Drive)."
89
+ ),
81
90
  value=False,
82
91
  show=False,
83
92
  ),
@@ -113,6 +122,7 @@ class SaveToFileComponent(Component):
113
122
  info="AWS Access key ID.",
114
123
  show=False,
115
124
  advanced=True,
125
+ required=True,
116
126
  ),
117
127
  SecretStrInput(
118
128
  name="aws_secret_access_key",
@@ -120,6 +130,7 @@ class SaveToFileComponent(Component):
120
130
  info="AWS Secret Key.",
121
131
  show=False,
122
132
  advanced=True,
133
+ required=True,
123
134
  ),
124
135
  StrInput(
125
136
  name="bucket_name",
@@ -127,6 +138,7 @@ class SaveToFileComponent(Component):
127
138
  info="Enter the name of the S3 bucket.",
128
139
  show=False,
129
140
  advanced=True,
141
+ required=True,
130
142
  ),
131
143
  StrInput(
132
144
  name="aws_region",
@@ -149,6 +161,7 @@ class SaveToFileComponent(Component):
149
161
  info="Your Google Cloud Platform service account JSON key as a secret string (complete JSON content).",
150
162
  show=False,
151
163
  advanced=True,
164
+ required=True,
152
165
  ),
153
166
  StrInput(
154
167
  name="folder_id",
@@ -157,6 +170,7 @@ class SaveToFileComponent(Component):
157
170
  "The Google Drive folder ID where the file will be uploaded. "
158
171
  "The folder must be shared with the service account email."
159
172
  ),
173
+ required=True,
160
174
  show=False,
161
175
  advanced=True,
162
176
  ),
@@ -166,6 +180,12 @@ class SaveToFileComponent(Component):
166
180
 
167
181
  def update_build_config(self, build_config, field_value, field_name=None):
168
182
  """Update build configuration to show/hide fields based on storage location selection."""
183
+ # Update options dynamically based on cloud environment
184
+ # This ensures options are refreshed when build_config is updated
185
+ if "storage_location" in build_config:
186
+ updated_options = _get_storage_location_options()
187
+ build_config["storage_location"]["options"] = updated_options
188
+
169
189
  if field_name != "storage_location":
170
190
  return build_config
171
191
 
@@ -196,11 +216,13 @@ class SaveToFileComponent(Component):
196
216
  if len(selected) == 1:
197
217
  location = selected[0]
198
218
 
199
- # Show file_name and append_mode when any storage location is selected
219
+ # Show file_name when any storage location is selected
200
220
  if "file_name" in build_config:
201
221
  build_config["file_name"]["show"] = True
222
+
223
+ # Show append_mode only for Local storage (not supported for cloud storage)
202
224
  if "append_mode" in build_config:
203
- build_config["append_mode"]["show"] = True
225
+ build_config["append_mode"]["show"] = location == "Local"
204
226
 
205
227
  if location == "Local":
206
228
  if "local_format" in build_config:
@@ -218,12 +240,14 @@ class SaveToFileComponent(Component):
218
240
  for f_name in aws_fields:
219
241
  if f_name in build_config:
220
242
  build_config[f_name]["show"] = True
243
+ build_config[f_name]["advanced"] = False
221
244
 
222
245
  elif location == "Google Drive":
223
246
  gdrive_fields = ["gdrive_format", "service_account_key", "folder_id"]
224
247
  for f_name in gdrive_fields:
225
248
  if f_name in build_config:
226
249
  build_config[f_name]["show"] = True
250
+ build_config[f_name]["advanced"] = False
227
251
 
228
252
  return build_config
229
253
 
@@ -243,6 +267,11 @@ class SaveToFileComponent(Component):
243
267
  msg = "Storage location must be selected."
244
268
  raise ValueError(msg)
245
269
 
270
+ # Check if Local storage is disabled in cloud environment
271
+ if storage_location == "Local" and is_astra_cloud_environment():
272
+ msg = "Local storage is not available in cloud environment. Please use AWS or Google Drive."
273
+ raise ValueError(msg)
274
+
246
275
  # Route to appropriate save method based on storage location
247
276
  if storage_location == "Local":
248
277
  return await self._save_to_local()
@@ -534,32 +563,67 @@ class SaveToFileComponent(Component):
534
563
 
535
564
  async def _save_to_aws(self) -> Message:
536
565
  """Save file to AWS S3 using S3 functionality."""
566
+ import os
567
+
568
+ import boto3
569
+
570
+ from lfx.base.data.cloud_storage_utils import create_s3_client, validate_aws_credentials
571
+
572
+ # Get AWS credentials from component inputs or fall back to environment variables
573
+ aws_access_key_id = getattr(self, "aws_access_key_id", None)
574
+ if aws_access_key_id and hasattr(aws_access_key_id, "get_secret_value"):
575
+ aws_access_key_id = aws_access_key_id.get_secret_value()
576
+ if not aws_access_key_id:
577
+ aws_access_key_id = os.getenv("AWS_ACCESS_KEY_ID")
578
+
579
+ aws_secret_access_key = getattr(self, "aws_secret_access_key", None)
580
+ if aws_secret_access_key and hasattr(aws_secret_access_key, "get_secret_value"):
581
+ aws_secret_access_key = aws_secret_access_key.get_secret_value()
582
+ if not aws_secret_access_key:
583
+ aws_secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY")
584
+
585
+ bucket_name = getattr(self, "bucket_name", None)
586
+ if not bucket_name:
587
+ # Try to get from storage service settings
588
+ settings = get_settings_service().settings
589
+ bucket_name = settings.object_storage_bucket_name
590
+
537
591
  # Validate AWS credentials
538
- if not getattr(self, "aws_access_key_id", None):
539
- msg = "AWS Access Key ID is required for S3 storage"
592
+ if not aws_access_key_id:
593
+ msg = (
594
+ "AWS Access Key ID is required for S3 storage. Provide it as a component input "
595
+ "or set AWS_ACCESS_KEY_ID environment variable."
596
+ )
540
597
  raise ValueError(msg)
541
- if not getattr(self, "aws_secret_access_key", None):
542
- msg = "AWS Secret Key is required for S3 storage"
598
+ if not aws_secret_access_key:
599
+ msg = (
600
+ "AWS Secret Key is required for S3 storage. Provide it as a component input "
601
+ "or set AWS_SECRET_ACCESS_KEY environment variable."
602
+ )
543
603
  raise ValueError(msg)
544
- if not getattr(self, "bucket_name", None):
545
- msg = "S3 Bucket Name is required for S3 storage"
604
+ if not bucket_name:
605
+ msg = (
606
+ "S3 Bucket Name is required for S3 storage. Provide it as a component input "
607
+ "or set LANGFLOW_OBJECT_STORAGE_BUCKET_NAME environment variable."
608
+ )
546
609
  raise ValueError(msg)
547
610
 
548
- # Use S3 upload functionality
549
- try:
550
- import boto3
551
- except ImportError as e:
552
- msg = "boto3 is not installed. Please install it using `uv pip install boto3`."
553
- raise ImportError(msg) from e
611
+ # Validate AWS credentials
612
+ validate_aws_credentials(self)
554
613
 
555
614
  # Create S3 client
556
- client_config = {
557
- "aws_access_key_id": self.aws_access_key_id,
558
- "aws_secret_access_key": self.aws_secret_access_key,
615
+ s3_client = create_s3_client(self)
616
+ client_config: dict[str, Any] = {
617
+ "aws_access_key_id": str(aws_access_key_id),
618
+ "aws_secret_access_key": str(aws_secret_access_key),
559
619
  }
560
620
 
561
- if hasattr(self, "aws_region") and self.aws_region:
562
- client_config["region_name"] = self.aws_region
621
+ # Get region from component input, environment variable, or settings
622
+ aws_region = getattr(self, "aws_region", None)
623
+ if not aws_region:
624
+ aws_region = os.getenv("AWS_DEFAULT_REGION") or os.getenv("AWS_REGION")
625
+ if aws_region:
626
+ client_config["region_name"] = str(aws_region)
563
627
 
564
628
  s3_client = boto3.client("s3", **client_config)
565
629
 
@@ -575,14 +639,16 @@ class SaveToFileComponent(Component):
575
639
  # Create temporary file
576
640
  import tempfile
577
641
 
578
- with tempfile.NamedTemporaryFile(mode="w", suffix=f".{file_format}", delete=False) as temp_file:
642
+ with tempfile.NamedTemporaryFile(
643
+ mode="w", encoding="utf-8", suffix=f".{file_format}", delete=False
644
+ ) as temp_file:
579
645
  temp_file.write(content)
580
646
  temp_file_path = temp_file.name
581
647
 
582
648
  try:
583
649
  # Upload to S3
584
- s3_client.upload_file(temp_file_path, self.bucket_name, file_path)
585
- s3_url = f"s3://{self.bucket_name}/{file_path}"
650
+ s3_client.upload_file(temp_file_path, bucket_name, file_path)
651
+ s3_url = f"s3://{bucket_name}/{file_path}"
586
652
  return Message(text=f"File successfully uploaded to {s3_url}")
587
653
  finally:
588
654
  # Clean up temp file
@@ -591,6 +657,12 @@ class SaveToFileComponent(Component):
591
657
 
592
658
  async def _save_to_google_drive(self) -> Message:
593
659
  """Save file to Google Drive using Google Drive functionality."""
660
+ import tempfile
661
+
662
+ from googleapiclient.http import MediaFileUpload
663
+
664
+ from lfx.base.data.cloud_storage_utils import create_google_drive_service
665
+
594
666
  # Validate Google Drive credentials
595
667
  if not getattr(self, "service_account_key", None):
596
668
  msg = "GCP Credentials Secret Key is required for Google Drive storage"
@@ -599,30 +671,10 @@ class SaveToFileComponent(Component):
599
671
  msg = "Google Drive Folder ID is required for Google Drive storage"
600
672
  raise ValueError(msg)
601
673
 
602
- # Use Google Drive upload functionality
603
- try:
604
- import json
605
- import tempfile
606
-
607
- from google.oauth2 import service_account
608
- from googleapiclient.discovery import build
609
- from googleapiclient.http import MediaFileUpload
610
- except ImportError as e:
611
- msg = "Google API client libraries are not installed. Please install them."
612
- raise ImportError(msg) from e
613
-
614
- # Parse credentials
615
- try:
616
- credentials_dict = json.loads(self.service_account_key)
617
- except json.JSONDecodeError as e:
618
- msg = f"Invalid JSON in service account key: {e!s}"
619
- raise ValueError(msg) from e
620
-
621
- # Create Google Drive service
622
- credentials = service_account.Credentials.from_service_account_info(
623
- credentials_dict, scopes=["https://www.googleapis.com/auth/drive.file"]
674
+ # Create Google Drive service with full drive scope (needed for folder operations)
675
+ drive_service, credentials = create_google_drive_service(
676
+ self.service_account_key, scopes=["https://www.googleapis.com/auth/drive"], return_credentials=True
624
677
  )
625
- drive_service = build("drive", "v3", credentials=credentials)
626
678
 
627
679
  # Extract content and format
628
680
  content = self._extract_content_for_upload()
@@ -634,16 +686,34 @@ class SaveToFileComponent(Component):
634
686
 
635
687
  # Create temporary file
636
688
  file_path = f"{self.file_name}.{file_format}"
637
- with tempfile.NamedTemporaryFile(mode="w", suffix=f".{file_format}", delete=False) as temp_file:
689
+ with tempfile.NamedTemporaryFile(
690
+ mode="w",
691
+ encoding="utf-8",
692
+ suffix=f".{file_format}",
693
+ delete=False,
694
+ ) as temp_file:
638
695
  temp_file.write(content)
639
696
  temp_file_path = temp_file.name
640
697
 
641
698
  try:
642
699
  # Upload to Google Drive
700
+ # Note: We skip explicit folder verification since it requires broader permissions.
701
+ # If the folder doesn't exist or isn't accessible, the create() call will fail with a clear error.
643
702
  file_metadata = {"name": file_path, "parents": [self.folder_id]}
644
703
  media = MediaFileUpload(temp_file_path, resumable=True)
645
704
 
646
- uploaded_file = drive_service.files().create(body=file_metadata, media_body=media, fields="id").execute()
705
+ try:
706
+ uploaded_file = (
707
+ drive_service.files().create(body=file_metadata, media_body=media, fields="id").execute()
708
+ )
709
+ except Exception as e:
710
+ msg = (
711
+ f"Unable to upload file to Google Drive folder '{self.folder_id}'. "
712
+ f"Error: {e!s}. "
713
+ "Please ensure: 1) The folder ID is correct, 2) The folder exists, "
714
+ "3) The service account has been granted access to this folder."
715
+ )
716
+ raise ValueError(msg) from e
647
717
 
648
718
  file_id = uploaded_file.get("id")
649
719
  file_url = f"https://drive.google.com/file/d/{file_id}/view"
@@ -197,8 +197,14 @@ class WatsonxAIComponent(LCModelComponent):
197
197
  "logit_bias": logit_bias,
198
198
  }
199
199
 
200
+ # Pass API key as plain string to avoid SecretStr serialization issues
201
+ # when model is configured with with_config() or used in batch operations
202
+ api_key_value = self.api_key
203
+ if isinstance(api_key_value, SecretStr):
204
+ api_key_value = api_key_value.get_secret_value()
205
+
200
206
  return ChatWatsonx(
201
- apikey=SecretStr(self.api_key).get_secret_value(),
207
+ apikey=api_key_value,
202
208
  url=self.base_url,
203
209
  project_id=self.project_id,
204
210
  model_id=self.model_name,
@@ -121,13 +121,19 @@ class ChatOutput(ChatComponent):
121
121
  message = self.input_value
122
122
  # Update message properties
123
123
  message.text = text
124
+ # Preserve existing session_id from the incoming message if it exists
125
+ existing_session_id = message.session_id
124
126
  else:
125
127
  message = Message(text=text)
128
+ existing_session_id = None
126
129
 
127
130
  # Set message properties
128
131
  message.sender = self.sender
129
132
  message.sender_name = self.sender_name
130
- message.session_id = self.session_id or self.graph.session_id or ""
133
+ # Preserve session_id from incoming message, or use component/graph session_id
134
+ message.session_id = (
135
+ self.session_id or existing_session_id or (self.graph.session_id if hasattr(self, "graph") else None) or ""
136
+ )
131
137
  message.context_id = self.context_id
132
138
  message.flow_id = self.graph.flow_id if hasattr(self, "graph") else None
133
139
  message.properties.source = self._build_source(source_id, display_name, source)
@@ -44,12 +44,20 @@ class ToolCallingAgentComponent(LCToolsAgentComponent):
44
44
  return self.chat_history
45
45
 
46
46
  def create_agent_runnable(self):
47
- messages = [
48
- ("system", "{system_prompt}"),
49
- ("placeholder", "{chat_history}"),
50
- ("human", "{input}"),
51
- ("placeholder", "{agent_scratchpad}"),
52
- ]
47
+ messages = []
48
+
49
+ # Only include system message if system_prompt is provided and not empty
50
+ if hasattr(self, "system_prompt") and self.system_prompt and self.system_prompt.strip():
51
+ messages.append(("system", "{system_prompt}"))
52
+
53
+ messages.extend(
54
+ [
55
+ ("placeholder", "{chat_history}"),
56
+ ("human", "{input}"),
57
+ ("placeholder", "{agent_scratchpad}"),
58
+ ]
59
+ )
60
+
53
61
  prompt = ChatPromptTemplate.from_messages(messages)
54
62
  self.validate_tool_names()
55
63
  try:
@@ -4,8 +4,13 @@ from typing import TYPE_CHECKING, Any, cast
4
4
 
5
5
  import toml # type: ignore[import-untyped]
6
6
 
7
+ from lfx.base.models.unified_models import (
8
+ get_language_model_options,
9
+ get_model_classes,
10
+ update_model_options_in_build_config,
11
+ )
7
12
  from lfx.custom.custom_component.component import Component
8
- from lfx.io import BoolInput, DataFrameInput, HandleInput, MessageTextInput, MultilineInput, Output
13
+ from lfx.io import BoolInput, DataFrameInput, MessageTextInput, ModelInput, MultilineInput, Output, SecretStrInput
9
14
  from lfx.log.logger import logger
10
15
  from lfx.schema.dataframe import DataFrame
11
16
 
@@ -20,13 +25,20 @@ class BatchRunComponent(Component):
20
25
  icon = "List"
21
26
 
22
27
  inputs = [
23
- HandleInput(
28
+ ModelInput(
24
29
  name="model",
25
30
  display_name="Language Model",
26
- info="Connect the 'Language Model' output from your LLM component here.",
27
- input_types=["LanguageModel"],
31
+ info="Select your model provider",
32
+ real_time_refresh=True,
28
33
  required=True,
29
34
  ),
35
+ SecretStrInput(
36
+ name="api_key",
37
+ display_name="API Key",
38
+ info="Model Provider API key",
39
+ real_time_refresh=True,
40
+ advanced=True,
41
+ ),
30
42
  MultilineInput(
31
43
  name="system_message",
32
44
  display_name="Instructions",
@@ -76,6 +88,17 @@ class BatchRunComponent(Component):
76
88
  ),
77
89
  ]
78
90
 
91
+ def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):
92
+ """Dynamically update build config with user-filtered model options."""
93
+ return update_model_options_in_build_config(
94
+ component=self,
95
+ build_config=build_config,
96
+ cache_key_prefix="language_model_options",
97
+ get_options_func=get_language_model_options,
98
+ field_name=field_name,
99
+ field_value=field_value,
100
+ )
101
+
79
102
  def _format_row_as_toml(self, row: dict[str, Any]) -> str:
80
103
  """Convert a dictionary (row) into a TOML-formatted string."""
81
104
  formatted_dict = {str(col): {"value": str(val)} for col, val in row.items()}
@@ -111,20 +134,43 @@ class BatchRunComponent(Component):
111
134
  }
112
135
 
113
136
  async def run_batch(self) -> DataFrame:
114
- """Process each row in df[column_name] with the language model asynchronously.
115
-
116
- Returns:
117
- DataFrame: A new DataFrame containing:
118
- - All original columns
119
- - The model's response column (customizable name)
120
- - 'batch_index' column for processing order
121
- - 'metadata' (optional)
122
-
123
- Raises:
124
- ValueError: If the specified column is not found in the DataFrame
125
- TypeError: If the model is not compatible or input types are wrong
126
- """
127
- model: Runnable = self.model
137
+ """Process each row in df[column_name] with the language model asynchronously."""
138
+ # Check if model is already an instance (for testing) or needs to be instantiated
139
+ if isinstance(self.model, list):
140
+ # Extract model configuration
141
+ model_selection = self.model[0]
142
+ model_name = model_selection.get("name")
143
+ provider = model_selection.get("provider")
144
+ metadata = model_selection.get("metadata", {})
145
+
146
+ # Get model class and parameters from metadata
147
+ model_class = get_model_classes().get(metadata.get("model_class"))
148
+ if model_class is None:
149
+ msg = f"No model class defined for {model_name}"
150
+ raise ValueError(msg)
151
+
152
+ api_key_param = metadata.get("api_key_param", "api_key")
153
+ model_name_param = metadata.get("model_name_param", "model")
154
+
155
+ # Get API key from global variables
156
+ from lfx.base.models.unified_models import get_api_key_for_provider
157
+
158
+ api_key = get_api_key_for_provider(self.user_id, provider, self.api_key)
159
+
160
+ if not api_key and provider != "Ollama":
161
+ msg = f"{provider} API key is required. Please configure it globally."
162
+ raise ValueError(msg)
163
+
164
+ # Instantiate the model
165
+ kwargs = {
166
+ model_name_param: model_name,
167
+ api_key_param: api_key,
168
+ }
169
+ model: Runnable = model_class(**kwargs)
170
+ else:
171
+ # Model is already an instance (typically in tests)
172
+ model = self.model
173
+
128
174
  system_msg = self.system_message or ""
129
175
  df: DataFrame = self.df
130
176
  col_name = self.column_name or ""
@@ -159,13 +205,22 @@ class BatchRunComponent(Component):
159
205
  ]
160
206
 
161
207
  # Configure the model with project info and callbacks
162
- model = model.with_config(
163
- {
164
- "run_name": self.display_name,
165
- "project_name": self.get_project_name(),
166
- "callbacks": self.get_langchain_callbacks(),
167
- }
168
- )
208
+ # Some models (e.g., ChatWatsonx) may have serialization issues with with_config()
209
+ # due to SecretStr or other non-serializable attributes
210
+ try:
211
+ model = model.with_config(
212
+ {
213
+ "run_name": self.display_name,
214
+ "project_name": self.get_project_name(),
215
+ "callbacks": self.get_langchain_callbacks(),
216
+ }
217
+ )
218
+ except (TypeError, ValueError, AttributeError) as e:
219
+ # Log warning and continue without configuration
220
+ await logger.awarning(
221
+ f"Could not configure model with callbacks and project info: {e!s}. "
222
+ "Proceeding with batch processing without configuration."
223
+ )
169
224
  # Process batches and track progress
170
225
  responses_with_idx = list(
171
226
  zip(
@@ -4,14 +4,23 @@ import json
4
4
  import re
5
5
  from typing import TYPE_CHECKING, Any
6
6
 
7
+ from lfx.base.models.unified_models import (
8
+ get_language_model_options,
9
+ get_llm,
10
+ update_model_options_in_build_config,
11
+ )
7
12
  from lfx.custom.custom_component.component import Component
8
- from lfx.io import DataInput, HandleInput, IntInput, MultilineInput, Output
13
+ from lfx.io import DataInput, IntInput, ModelInput, MultilineInput, Output, SecretStrInput
9
14
  from lfx.schema.data import Data
10
15
  from lfx.schema.dataframe import DataFrame
11
16
 
12
17
  if TYPE_CHECKING:
13
18
  from collections.abc import Callable
14
19
 
20
+ # # Compute model options once at module level
21
+ # _MODEL_OPTIONS = get_language_model_options()
22
+ # _PROVIDERS = [provider["provider"] for provider in _MODEL_OPTIONS]
23
+
15
24
 
16
25
  class LambdaFilterComponent(Component):
17
26
  display_name = "Smart Transform"
@@ -29,13 +38,20 @@ class LambdaFilterComponent(Component):
29
38
  is_list=True,
30
39
  required=True,
31
40
  ),
32
- HandleInput(
33
- name="llm",
41
+ ModelInput(
42
+ name="model",
34
43
  display_name="Language Model",
35
- info="Connect the 'Language Model' output from your LLM component here.",
36
- input_types=["LanguageModel"],
44
+ info="Select your model provider",
45
+ real_time_refresh=True,
37
46
  required=True,
38
47
  ),
48
+ SecretStrInput(
49
+ name="api_key",
50
+ display_name="API Key",
51
+ info="Model Provider API key",
52
+ real_time_refresh=True,
53
+ advanced=True,
54
+ ),
39
55
  MultilineInput(
40
56
  name="filter_instruction",
41
57
  display_name="Instructions",
@@ -75,6 +91,17 @@ class LambdaFilterComponent(Component):
75
91
  ),
76
92
  ]
77
93
 
94
+ def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):
95
+ """Dynamically update build config with user-filtered model options."""
96
+ return update_model_options_in_build_config(
97
+ component=self,
98
+ build_config=build_config,
99
+ cache_key_prefix="language_model_options",
100
+ get_options_func=get_language_model_options,
101
+ field_name=field_name,
102
+ field_value=field_value,
103
+ )
104
+
78
105
  def get_data_structure(self, data):
79
106
  """Extract the structure of data, replacing values with their types."""
80
107
  if isinstance(data, list):
@@ -129,7 +156,7 @@ class LambdaFilterComponent(Component):
129
156
  dump = json.dumps(data)
130
157
  self.log(str(data))
131
158
 
132
- llm = self.llm
159
+ llm = get_llm(model=self.model, user_id=self.user_id, api_key=self.api_key)
133
160
  instruction = self.filter_instruction
134
161
  sample_size = self.sample_size
135
162