lfx-nightly 0.1.12.dev39__py3-none-any.whl → 0.1.12.dev41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lfx-nightly might be problematic. Click here for more details.

Files changed (41) hide show
  1. lfx/_assets/component_index.json +1 -1
  2. lfx/base/composio/composio_base.py +383 -42
  3. lfx/base/models/google_generative_ai_model.py +38 -0
  4. lfx/components/agents/__init__.py +3 -1
  5. lfx/components/agents/cuga_agent.py +995 -0
  6. lfx/components/arxiv/arxiv.py +8 -2
  7. lfx/components/composio/__init__.py +71 -17
  8. lfx/components/composio/agentql_composio.py +11 -0
  9. lfx/components/composio/agiled_composio.py +11 -0
  10. lfx/components/composio/bolna_composio.py +11 -0
  11. lfx/components/composio/brightdata_composio.py +11 -0
  12. lfx/components/composio/canvas_composio.py +11 -0
  13. lfx/components/composio/digicert_composio.py +11 -0
  14. lfx/components/composio/finage_composio.py +11 -0
  15. lfx/components/composio/fixer_composio.py +11 -0
  16. lfx/components/composio/flexisign_composio.py +11 -0
  17. lfx/components/composio/freshdesk_composio.py +11 -0
  18. lfx/components/composio/googleclassroom_composio.py +11 -0
  19. lfx/components/composio/instagram_composio.py +11 -0
  20. lfx/components/composio/jira_composio.py +11 -0
  21. lfx/components/composio/jotform_composio.py +11 -0
  22. lfx/components/composio/listennotes_composio.py +11 -0
  23. lfx/components/composio/missive_composio.py +11 -0
  24. lfx/components/composio/pandadoc_composio.py +11 -0
  25. lfx/components/composio/slack_composio.py +573 -2
  26. lfx/components/composio/timelinesai_composio.py +11 -0
  27. lfx/components/datastax/astra_db.py +1 -0
  28. lfx/components/datastax/astradb_cql.py +1 -1
  29. lfx/components/datastax/astradb_graph.py +1 -0
  30. lfx/components/datastax/astradb_tool.py +1 -1
  31. lfx/components/datastax/astradb_vectorstore.py +1 -1
  32. lfx/components/datastax/hcd.py +1 -0
  33. lfx/components/google/google_generative_ai.py +4 -7
  34. lfx/components/logic/__init__.py +3 -0
  35. lfx/components/logic/llm_conditional_router.py +65 -21
  36. lfx/components/models/language_model.py +2 -2
  37. lfx/components/processing/lambda_filter.py +82 -18
  38. {lfx_nightly-0.1.12.dev39.dist-info → lfx_nightly-0.1.12.dev41.dist-info}/METADATA +1 -1
  39. {lfx_nightly-0.1.12.dev39.dist-info → lfx_nightly-0.1.12.dev41.dist-info}/RECORD +41 -21
  40. {lfx_nightly-0.1.12.dev39.dist-info → lfx_nightly-0.1.12.dev41.dist-info}/WHEEL +0 -0
  41. {lfx_nightly-0.1.12.dev39.dist-info → lfx_nightly-0.1.12.dev41.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,995 @@
1
+ import asyncio
2
+ import json
3
+ import os
4
+ import re
5
+ import traceback
6
+ import uuid
7
+ from collections.abc import AsyncIterator
8
+ from typing import TYPE_CHECKING, Any, cast
9
+
10
+ from langchain_core.agents import AgentFinish
11
+ from langchain_core.messages import AIMessage, HumanMessage
12
+ from langchain_core.tools import StructuredTool
13
+ from langflow.field_typing import Tool
14
+ from langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput
15
+
16
+ # from langflow.logging import logger
17
+ from langflow.schema.data import Data
18
+ from langflow.schema.dotdict import dotdict
19
+ from langflow.schema.message import Message
20
+ from langflow.schema.table import EditMode
21
+ from pydantic import ValidationError
22
+
23
+ from lfx.base.agents.events import ExceptionWithMessageError
24
+ from lfx.base.models.model_input_constants import (
25
+ ALL_PROVIDER_FIELDS,
26
+ MODEL_DYNAMIC_UPDATE_FIELDS,
27
+ MODEL_PROVIDERS,
28
+ MODEL_PROVIDERS_DICT,
29
+ MODELS_METADATA,
30
+ )
31
+ from lfx.base.models.model_utils import get_model_name
32
+ from lfx.components.agents.agent import LCToolsAgentComponent
33
+ from lfx.components.helpers.current_date import CurrentDateComponent
34
+ from lfx.components.helpers.memory import MemoryComponent
35
+ from lfx.components.langchain_utilities.tool_calling import ToolCallingAgentComponent
36
+ from lfx.custom.custom_component.component import _get_component_toolkit
37
+ from lfx.custom.utils import update_component_build_config
38
+ from lfx.helpers.base_model import build_model_from_schema
39
+ from lfx.log.logger import logger
40
+
41
+ if TYPE_CHECKING:
42
+ from langflow.schema.log import SendMessageFunctionType
43
+
44
+
45
+ def set_advanced_true(component_input):
46
+ """Set the advanced flag to True for a component input.
47
+
48
+ Args:
49
+ component_input: The component input to modify
50
+
51
+ Returns:
52
+ The modified component input with advanced=True
53
+ """
54
+ component_input.advanced = True
55
+ return component_input
56
+
57
+
58
+ MODEL_PROVIDERS_LIST = ["OpenAI"]
59
+
60
+
61
+ class CugaComponent(ToolCallingAgentComponent):
62
+ """Cuga Agent Component for advanced AI task execution.
63
+
64
+ The Cuga component is an advanced AI agent that can execute complex tasks using
65
+ various tools, browser automation, and structured output generation. It supports
66
+ custom policies, web applications, and API interactions.
67
+
68
+ Attributes:
69
+ display_name: Human-readable name for the component
70
+ description: Brief description of the component's purpose
71
+ documentation: URL to component documentation
72
+ icon: Icon identifier for the UI
73
+ beta: Whether the component is in beta status
74
+ name: Internal component name
75
+ """
76
+
77
+ display_name: str = "Cuga"
78
+ description: str = "Define the Cuga agent's policies, then assign it a task."
79
+ documentation: str = "https://docs.langflow.org/agents"
80
+ icon = "bot"
81
+ beta = True
82
+ name = "Cuga"
83
+
84
+ memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]
85
+
86
+ # Filter out json_mode from OpenAI inputs since we handle structured output differently
87
+ openai_inputs_filtered = [
88
+ input_field
89
+ for input_field in MODEL_PROVIDERS_DICT["OpenAI"]["inputs"]
90
+ if not (hasattr(input_field, "name") and input_field.name == "json_mode")
91
+ ]
92
+
93
+ inputs = [
94
+ DropdownInput(
95
+ name="agent_llm",
96
+ display_name="Model Provider",
97
+ info="The provider of the language model that the agent will use to generate responses.",
98
+ options=[*MODEL_PROVIDERS_LIST, "Custom"],
99
+ value="OpenAI",
100
+ real_time_refresh=True,
101
+ input_types=[],
102
+ options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{"icon": "brain"}],
103
+ ),
104
+ *openai_inputs_filtered,
105
+ MultilineInput(
106
+ name="policies",
107
+ display_name="Policies",
108
+ info=(
109
+ "Custom instructions or policies for the agent to adhere to during its operation.\n"
110
+ "Example:\n"
111
+ "# Plan\n"
112
+ "< planning instructions e.g. which tools and when to use>\n"
113
+ "# Answer\n"
114
+ "< final answer instructions how to answer>"
115
+ ),
116
+ value="",
117
+ advanced=False,
118
+ ),
119
+ IntInput(
120
+ name="n_messages",
121
+ display_name="Number of Chat History Messages",
122
+ value=100,
123
+ info="Number of chat history messages to retrieve.",
124
+ advanced=True,
125
+ show=True,
126
+ ),
127
+ MultilineInput(
128
+ name="format_instructions",
129
+ display_name="Output Format Instructions",
130
+ info="Generic Template for structured output formatting. Valid only with Structured response.",
131
+ value=(
132
+ "You are an AI that extracts structured JSON objects from unstructured text. "
133
+ "Use a predefined schema with expected types (str, int, float, bool, dict). "
134
+ "Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. "
135
+ "Fill missing or ambiguous values with defaults: null for missing values. "
136
+ "Remove exact duplicates but keep variations that have different field values. "
137
+ "Always return valid JSON in the expected format, never throw errors. "
138
+ "If multiple objects can be extracted, return them all in the structured format."
139
+ ),
140
+ advanced=True,
141
+ ),
142
+ TableInput(
143
+ name="output_schema",
144
+ display_name="Output Schema",
145
+ info=(
146
+ "Schema Validation: Define the structure and data types for structured output. "
147
+ "No validation if no output schema."
148
+ ),
149
+ advanced=True,
150
+ required=False,
151
+ value=[],
152
+ table_schema=[
153
+ {
154
+ "name": "name",
155
+ "display_name": "Name",
156
+ "type": "str",
157
+ "description": "Specify the name of the output field.",
158
+ "default": "field",
159
+ "edit_mode": EditMode.INLINE,
160
+ },
161
+ {
162
+ "name": "description",
163
+ "display_name": "Description",
164
+ "type": "str",
165
+ "description": "Describe the purpose of the output field.",
166
+ "default": "description of field",
167
+ "edit_mode": EditMode.POPOVER,
168
+ },
169
+ {
170
+ "name": "type",
171
+ "display_name": "Type",
172
+ "type": "str",
173
+ "edit_mode": EditMode.INLINE,
174
+ "description": ("Indicate the data type of the output field (e.g., str, int, float, bool, dict)."),
175
+ "options": ["str", "int", "float", "bool", "dict"],
176
+ "default": "str",
177
+ },
178
+ {
179
+ "name": "multiple",
180
+ "display_name": "As List",
181
+ "type": "boolean",
182
+ "description": "Set to True if this output field should be a list of the specified type.",
183
+ "default": "False",
184
+ "edit_mode": EditMode.INLINE,
185
+ },
186
+ ],
187
+ ),
188
+ *LCToolsAgentComponent._base_inputs,
189
+ BoolInput(
190
+ name="add_current_date_tool",
191
+ display_name="Current Date",
192
+ advanced=True,
193
+ info="If true, will add a tool to the agent that returns the current date.",
194
+ value=True,
195
+ ),
196
+ BoolInput(
197
+ name="browser_enabled",
198
+ display_name="Enable Browser",
199
+ info="Toggle to enable a built-in browser tool for web scraping and searching.",
200
+ value=False,
201
+ advanced=False,
202
+ ),
203
+ MultilineInput(
204
+ name="web_apps",
205
+ display_name="Web applications",
206
+ info=(
207
+ "Define a list of web applications that cuga will open when enable browser is true. "
208
+ "Currently only supports one web application. Example: https://example.com"
209
+ ),
210
+ value="",
211
+ advanced=False,
212
+ ),
213
+ BoolInput(
214
+ name="API",
215
+ display_name="Enable API Sub-agent",
216
+ info="Toggle to enable a built-in sub-agent specialized for API interactions.",
217
+ value=False,
218
+ advanced=True,
219
+ ),
220
+ ]
221
+ outputs = [
222
+ Output(name="response", display_name="Response", method="message_response"),
223
+ Output(name="structured_response", display_name="Structured Response", method="json_response", tool_mode=False),
224
+ ]
225
+
226
+ async def call_agent(
227
+ self, current_input: str, tools: list[Tool], history_messages: list[Message], llm
228
+ ) -> AsyncIterator[dict[str, Any]]:
229
+ """Execute the Cuga agent with the given input and tools.
230
+
231
+ This method initializes and runs the Cuga agent, processing the input through
232
+ the agent's workflow and yielding events for real-time monitoring.
233
+
234
+ Args:
235
+ current_input: The user input to process
236
+ tools: List of available tools for the agent
237
+ history_messages: Previous conversation history
238
+ llm: The language model instance to use
239
+
240
+ Yields:
241
+ dict: Agent events including tool usage, thinking, and final results
242
+
243
+ Raises:
244
+ ValueError: If there's an error in agent initialization
245
+ TypeError: If there's a type error in processing
246
+ RuntimeError: If there's a runtime error during execution
247
+ ConnectionError: If there's a connection issue
248
+ """
249
+ yield {
250
+ "event": "on_chain_start",
251
+ "run_id": str(uuid.uuid4()),
252
+ "name": "CUGA_initializing",
253
+ "data": {"input": {"input": current_input, "chat_history": []}},
254
+ }
255
+ logger.debug(f"LLM MODEL TYPE: {type(llm)}")
256
+ if current_input:
257
+ os.environ["DYNACONF_ADVANCED_FEATURES__REGISTRY"] = "false"
258
+ if self.browser_enabled:
259
+ logger.info("browser_enabled is true, setting env to hybrid")
260
+ os.environ["DYNACONF_ADVANCED_FEATURES__MODE"] = "hybrid"
261
+ os.environ["DYNACONF_ADVANCED_FEATURES__USE_VISION"] = "false"
262
+ else:
263
+ logger.info("browser_enabled is false, setting env to api")
264
+ os.environ["DYNACONF_ADVANCED_FEATURES__MODE"] = "api"
265
+ from cuga.backend.activity_tracker.tracker import ActivityTracker
266
+ from cuga.backend.cuga_graph.utils.agent_loop import StreamEvent
267
+ from cuga.backend.cuga_graph.utils.controller import (
268
+ AgentRunner as CugaAgent,
269
+ )
270
+ from cuga.backend.cuga_graph.utils.controller import (
271
+ ExperimentResult as AgentResult,
272
+ )
273
+ from cuga.backend.llm.models import LLMManager
274
+ from cuga.configurations.instructions_manager import InstructionsManager
275
+
276
+ llm_manager = LLMManager()
277
+ llm_manager.set_llm(llm)
278
+ instructions_manager = InstructionsManager()
279
+ if self.policies:
280
+ logger.info(f"policies are: {self.policies}")
281
+ instructions_manager.set_instructions_from_one_file(self.policies)
282
+ tracker = ActivityTracker()
283
+ tracker.set_tools(tools)
284
+ cuga_agent = CugaAgent(browser_enabled=self.browser_enabled)
285
+ if self.browser_enabled:
286
+ await cuga_agent.initialize_freemode_env(start_url=self.web_apps.strip(), interface_mode="browser_only")
287
+ else:
288
+ await cuga_agent.initialize_appworld_env()
289
+ yield {
290
+ "event": "on_chain_start",
291
+ "run_id": str(uuid.uuid4()),
292
+ "name": "CUGA_thinking...",
293
+ "data": {"input": {"input": current_input, "chat_history": []}},
294
+ }
295
+ logger.info(f"[CUGA] current web apps are {self.web_apps}")
296
+ logger.info(f"[CUGA] Processing input: {current_input}")
297
+ try:
298
+ # Convert history to LangChain format for the event
299
+ lc_messages = []
300
+ for msg in history_messages:
301
+ if hasattr(msg, "sender") and msg.sender == "Human":
302
+ lc_messages.append(HumanMessage(content=msg.text))
303
+ else:
304
+ lc_messages.append(AIMessage(content=msg.text))
305
+
306
+ await asyncio.sleep(0.5)
307
+
308
+ tools_used = []
309
+
310
+ # Simulate browser tool usage
311
+ if getattr(self, "BROWSER", False) and any(
312
+ word in current_input.lower() for word in ["search", "web", "browse"]
313
+ ):
314
+ tool_run_id = str(uuid.uuid4())
315
+
316
+ yield {
317
+ "event": "on_tool_start",
318
+ "run_id": tool_run_id,
319
+ "name": "BrowserTool",
320
+ "data": {"input": {"query": current_input}},
321
+ }
322
+ await asyncio.sleep(0.3)
323
+
324
+ yield {
325
+ "event": "on_tool_end",
326
+ "run_id": tool_run_id,
327
+ "name": "BrowserTool",
328
+ "data": {"output": "Simulated web search results for: " + current_input},
329
+ }
330
+ tools_used.append("Performed web search")
331
+
332
+ # 2. Build final response
333
+ response_parts = []
334
+
335
+ response_parts.append(f"Processed input: '{current_input}'")
336
+ response_parts.append(f"Available tools: {len(tools)}")
337
+ # final_response = "CUGA Agent Response:\n" + "\n".join(response_parts)
338
+ last_event: StreamEvent = None
339
+ tool_run_id = None
340
+ # 3. Chain end event with AgentFinish
341
+ async for event in cuga_agent.run_task_generic_yield(eval_mode=False, goal=current_input):
342
+ logger.debug(f"recieved event {event}")
343
+ if last_event is not None and tool_run_id is not None:
344
+ logger.debug(f"last event {last_event}")
345
+ try:
346
+ # TODO: Extract data
347
+ data_dict = json.loads(last_event.data)
348
+ except json.JSONDecodeError:
349
+ data_dict = last_event.data
350
+ if last_event.name == "CodeAgent":
351
+ data_dict = data_dict["code"]
352
+ yield {
353
+ "event": "on_tool_end",
354
+ "run_id": tool_run_id,
355
+ "name": last_event.name,
356
+ "data": {"output": data_dict},
357
+ }
358
+ if isinstance(event, StreamEvent):
359
+ tool_run_id = str(uuid.uuid4())
360
+ last_event = StreamEvent(name=event.name, data=event.data)
361
+ tool_event = {
362
+ "event": "on_tool_start",
363
+ "run_id": tool_run_id,
364
+ "name": event.name,
365
+ "data": {"input": {}},
366
+ }
367
+ logger.debug(f"[CUGA] Yielding tool_start event: {event.name}")
368
+ yield tool_event
369
+
370
+ if isinstance(event, AgentResult):
371
+ task_result = event
372
+ end_event = {
373
+ "event": "on_chain_end",
374
+ "run_id": str(uuid.uuid4()),
375
+ "name": "CugaAgent",
376
+ "data": {"output": AgentFinish(return_values={"output": task_result.answer}, log="")},
377
+ }
378
+ answer_preview = task_result.answer[:100] if task_result.answer else "None"
379
+ logger.info(f"[CUGA] Yielding chain_end event with answer: {answer_preview}...")
380
+ yield end_event
381
+ # task_result: AgentResult = await cuga_agent.run_task_generic_yield(
382
+ # eval_mode=False, goal=current_input, on_progress=on_progress
383
+ # )
384
+
385
+ except (ValueError, TypeError, RuntimeError, ConnectionError) as e:
386
+ logger.error(f"An error occurred: {e!s}")
387
+ logger.error(f"Traceback: {traceback.format_exc()}")
388
+ error_msg = f"CUGA Agent error: {e!s}"
389
+ logger.error(f"[CUGA] Error occurred: {error_msg}")
390
+
391
+ # Emit error event
392
+ yield {
393
+ "event": "on_chain_error",
394
+ "run_id": str(uuid.uuid4()),
395
+ "name": "CugaAgent",
396
+ "data": {"error": error_msg},
397
+ }
398
+
399
+ async def message_response(self) -> Message:
400
+ """Generate a message response using the Cuga agent.
401
+
402
+ This method processes the input through the Cuga agent and returns a structured
403
+ message response. It handles agent initialization, tool setup, and event processing.
404
+
405
+ Returns:
406
+ Message: The agent's response message
407
+
408
+ Raises:
409
+ Exception: If there's an error during agent execution
410
+ """
411
+ logger.info("[CUGA] Starting Cuga agent run for message_response.")
412
+ logger.info(f"[CUGA] Agent input value: {self.input_value}")
413
+
414
+ # Validate input is not empty
415
+ if not self.input_value or not str(self.input_value).strip():
416
+ msg = "Message cannot be empty. Please provide a valid message."
417
+ raise ValueError(msg)
418
+
419
+ try:
420
+ llm_model, self.chat_history, self.tools = await self.get_agent_requirements()
421
+
422
+ # Create agent message for event processing
423
+ from lfx.schema.content_block import ContentBlock
424
+ from lfx.schema.message import MESSAGE_SENDER_AI
425
+
426
+ agent_message = Message(
427
+ sender=MESSAGE_SENDER_AI,
428
+ sender_name="Cuga",
429
+ properties={"icon": "Bot", "state": "partial"},
430
+ content_blocks=[ContentBlock(title="Agent Steps", contents=[])],
431
+ session_id=self.graph.session_id,
432
+ )
433
+
434
+ # Get input text
435
+ input_text = self.input_value.text if hasattr(self.input_value, "text") else str(self.input_value)
436
+
437
+ # Create event iterator from call_agent
438
+ event_iterator = self.call_agent(
439
+ current_input=input_text, tools=self.tools or [], history_messages=self.chat_history, llm=llm_model
440
+ )
441
+
442
+ # Process events using the existing event processing system
443
+ from lfx.base.agents.events import process_agent_events
444
+
445
+ # Create a wrapper that forces DB updates for event handlers
446
+ # This ensures the UI can see loading steps in real-time via polling
447
+ async def force_db_update_send_message(message, id_=None, *, skip_db_update=False): # noqa: ARG001
448
+ # Always persist to DB so polling-based UI shows loading steps in real-time
449
+ content_blocks_len = len(message.content_blocks[0].contents) if message.content_blocks else 0
450
+ logger.debug(
451
+ f"[CUGA] Sending message update - state: {message.properties.state}, "
452
+ f"content_blocks: {content_blocks_len}"
453
+ )
454
+ result = await self.send_message(message, id_=id_, skip_db_update=False)
455
+ logger.debug(f"[CUGA] Message saved to DB with ID: {result.id if result else 'None'}")
456
+ return result
457
+
458
+ result = await process_agent_events(
459
+ event_iterator, agent_message, cast("SendMessageFunctionType", force_db_update_send_message)
460
+ )
461
+
462
+ logger.info("[CUGA] Agent run finished successfully.")
463
+ logger.info(f"[CUGA] Agent output: {result}")
464
+
465
+ # Store result for potential JSON output
466
+ self._agent_result = result
467
+
468
+ except Exception as e:
469
+ logger.error(f"[CUGA] Error in message_response: {e}")
470
+ logger.error(f"An error occurred: {e!s}")
471
+ logger.error(f"Traceback: {traceback.format_exc()}")
472
+
473
+ # Check if error is related to Playwright installation
474
+ error_str = str(e).lower()
475
+ if "playwright install" in error_str:
476
+ msg = (
477
+ "Playwright is not installed. Please install Playwright Chromium using: "
478
+ "uv run -m playwright install chromium"
479
+ )
480
+ raise ValueError(msg) from e
481
+
482
+ raise
483
+ else:
484
+ return result
485
+
486
+ async def get_agent_requirements(self):
487
+ """Get the agent requirements for the Cuga agent.
488
+
489
+ This method retrieves and configures all necessary components for the agent
490
+ including the language model, chat history, and tools.
491
+
492
+ Returns:
493
+ tuple: A tuple containing (llm_model, chat_history, tools)
494
+
495
+ Raises:
496
+ ValueError: If no language model is selected or if there's an error
497
+ in model initialization
498
+ """
499
+ llm_model, display_name = await self.get_llm()
500
+ if llm_model is None:
501
+ msg = "No language model selected. Please choose a model to proceed."
502
+ raise ValueError(msg)
503
+ self.model_name = get_model_name(llm_model, display_name=display_name)
504
+
505
+ # Get memory data
506
+ self.chat_history = await self.get_memory_data()
507
+ if isinstance(self.chat_history, Message):
508
+ self.chat_history = [self.chat_history]
509
+
510
+ # Add current date tool if enabled
511
+ if self.add_current_date_tool:
512
+ if not isinstance(self.tools, list):
513
+ self.tools = []
514
+ current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)
515
+ if not isinstance(current_date_tool, StructuredTool):
516
+ msg = "CurrentDateComponent must be converted to a StructuredTool"
517
+ raise TypeError(msg)
518
+ self.tools.append(current_date_tool)
519
+
520
+ # --- ADDED LOGGING START ---
521
+ logger.info("[CUGA] Retrieved agent requirements: LLM, chat history, and tools.")
522
+ logger.info(f"[CUGA] LLM model: {self.model_name}")
523
+ logger.info(f"[CUGA] Number of chat history messages: {len(self.chat_history)}")
524
+ logger.info(f"[CUGA] Tools available: {[tool.name for tool in self.tools]}")
525
+ logger.info(f"[CUGA] metadata: {[tool.metadata for tool in self.tools]}")
526
+ # --- ADDED LOGGING END ---
527
+
528
+ return llm_model, self.chat_history, self.tools
529
+
530
+ def _preprocess_schema(self, schema):
531
+ """Preprocess schema to ensure correct data types for build_model_from_schema.
532
+
533
+ This method validates and normalizes the output schema to ensure it's compatible
534
+ with the Pydantic model building process.
535
+
536
+ Args:
537
+ schema: List of schema field definitions
538
+
539
+ Returns:
540
+ list: Processed schema with validated data types
541
+ """
542
+ processed_schema = []
543
+ for field in schema:
544
+ processed_field = {
545
+ "name": str(field.get("name", "field")),
546
+ "type": str(field.get("type", "str")),
547
+ "description": str(field.get("description", "")),
548
+ "multiple": field.get("multiple", False),
549
+ }
550
+ # Ensure multiple is handled correctly
551
+ if isinstance(processed_field["multiple"], str):
552
+ processed_field["multiple"] = processed_field["multiple"].lower() in ["true", "1", "t", "y", "yes"]
553
+ processed_schema.append(processed_field)
554
+ return processed_schema
555
+
556
+ async def build_structured_output_base(self, content: str):
557
+ """Build structured output with optional BaseModel validation.
558
+
559
+ This method parses JSON content from the agent response and optionally validates
560
+ it against a provided schema using Pydantic models.
561
+
562
+ Args:
563
+ content: The raw content from the agent response
564
+
565
+ Returns:
566
+ dict or list: Parsed and optionally validated JSON data
567
+ """
568
+ # --- ADDED LOGGING START ---
569
+ logger.info(f"[CUGA] Attempting to build structured output from content: {content}")
570
+ # --- ADDED LOGGING END ---
571
+
572
+ json_pattern = r"\{.*\}"
573
+ schema_error_msg = "Try setting an output schema"
574
+
575
+ # Try to parse content as JSON first
576
+ json_data = None
577
+ try:
578
+ json_data = json.loads(content)
579
+ except json.JSONDecodeError:
580
+ json_match = re.search(json_pattern, content, re.DOTALL)
581
+ if json_match:
582
+ try:
583
+ json_data = json.loads(json_match.group())
584
+ except json.JSONDecodeError:
585
+ logger.warning("[CUGA] Could not parse content as JSON even with regex match.")
586
+ return {"content": content, "error": schema_error_msg}
587
+ else:
588
+ logger.warning("[CUGA] No JSON pattern found in the content.")
589
+ return {"content": content, "error": schema_error_msg}
590
+
591
+ # If no output schema provided, return parsed JSON without validation
592
+ if not hasattr(self, "output_schema") or not self.output_schema or len(self.output_schema) == 0:
593
+ logger.info("[CUGA] No output schema provided. Returning parsed JSON without validation.")
594
+ return json_data
595
+
596
+ # Use BaseModel validation with schema
597
+ try:
598
+ logger.info("[CUGA] Output schema detected. Validating structured output against schema.")
599
+ processed_schema = self._preprocess_schema(self.output_schema)
600
+ output_model = build_model_from_schema(processed_schema)
601
+
602
+ # Validate against the schema
603
+ if isinstance(json_data, list):
604
+ # Multiple objects
605
+ validated_objects = []
606
+ for item in json_data:
607
+ try:
608
+ validated_obj = output_model.model_validate(item)
609
+ validated_objects.append(validated_obj.model_dump())
610
+ except ValidationError as e:
611
+ await logger.aerror(f"[CUGA] Validation error for item: {e}")
612
+ validated_objects.append({"data": item, "validation_error": str(e)})
613
+ return validated_objects
614
+
615
+ # Single object
616
+ try:
617
+ validated_obj = output_model.model_validate(json_data)
618
+ return [validated_obj.model_dump()]
619
+ except ValidationError as e:
620
+ await logger.aerror(f"[CUGA] Validation error: {e}")
621
+ return [{"data": json_data, "validation_error": str(e)}]
622
+
623
+ except (TypeError, ValueError) as e:
624
+ await logger.aerror(f"[CUGA] Error building structured output: {e}")
625
+ return json_data
626
+
627
+ async def json_response(self) -> Data:
628
+ """Convert agent response to structured JSON Data output with schema validation.
629
+
630
+ This method generates a structured JSON response by combining system instructions,
631
+ format instructions, and schema information, then processing the agent's response
632
+ through structured output validation.
633
+
634
+ Returns:
635
+ Data: Structured data object containing the validated JSON response
636
+
637
+ Raises:
638
+ ExceptionWithMessageError: If there's an error in structured processing
639
+ ValueError: If there's a validation error
640
+ TypeError: If there's a type error in processing
641
+ """
642
+ # --- ADDED LOGGING START ---
643
+ logger.info("[CUGA] Starting Cuga agent run for json_response.")
644
+ logger.info(f"[CUGA] Agent input value: {self.input_value}")
645
+ # --- ADDED LOGGING END ---
646
+
647
+ try:
648
+ system_components = []
649
+
650
+ # 1. Agent Instructions
651
+ agent_instructions = getattr(self, "instructions", "") or ""
652
+ if agent_instructions:
653
+ system_components.append(f"{agent_instructions}")
654
+
655
+ # 3. Format Instructions
656
+ format_instructions = getattr(self, "format_instructions", "") or ""
657
+ if format_instructions:
658
+ system_components.append(f"Format instructions: {format_instructions}")
659
+
660
+ # 4. Schema Information
661
+ if hasattr(self, "output_schema") and self.output_schema and len(self.output_schema) > 0:
662
+ try:
663
+ processed_schema = self._preprocess_schema(self.output_schema)
664
+ output_model = build_model_from_schema(processed_schema)
665
+ schema_dict = output_model.model_json_schema()
666
+ schema_info = (
667
+ "You are given some text that may include format instructions, "
668
+ "explanations, or other content alongside a JSON schema.\n\n"
669
+ "Your task:\n"
670
+ "- Extract only the JSON schema.\n"
671
+ "- Return it as valid JSON.\n"
672
+ "- Do not include format instructions, explanations, or extra text.\n\n"
673
+ "Input:\n"
674
+ f"{json.dumps(schema_dict, indent=2)}\n\n"
675
+ "Output (only JSON schema):"
676
+ )
677
+ system_components.append(schema_info)
678
+ except (ValidationError, ValueError, TypeError, KeyError) as e:
679
+ await logger.aerror(f"[CUGA] Could not build schema for prompt: {e}", exc_info=True)
680
+
681
+ # Combine all components
682
+ combined_instructions = "\n\n".join(system_components) if system_components else ""
683
+
684
+ llm_model, self.chat_history, self.tools = await self.get_agent_requirements()
685
+
686
+ # Use call_agent for structured response
687
+ input_text = self.input_value.text if hasattr(self.input_value, "text") else str(self.input_value)
688
+
689
+ # Modify the input to include structured output requirements
690
+ structured_input = (
691
+ f"{combined_instructions}\n\nUser Input: {input_text}\n\nPlease provide a structured JSON response."
692
+ )
693
+
694
+ logger.info(f"[CUGA] Combined system prompt for structured agent: {combined_instructions}")
695
+
696
+ content = await self.call_agent(
697
+ current_input=structured_input,
698
+ tools=self.tools or [],
699
+ history_messages=self.chat_history,
700
+ llm=llm_model,
701
+ )
702
+
703
+ logger.info(f"[CUGA] Structured agent result: {content}")
704
+
705
+ except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:
706
+ await logger.aerror(f"[CUGA] Error with structured agent: {e}")
707
+ content_str = "No content returned from Cuga agent"
708
+ return Data(data={"content": content_str, "error": str(e)})
709
+
710
+ # Process with structured output validation
711
+ try:
712
+ structured_output = await self.build_structured_output_base(content)
713
+
714
+ # Handle different output formats
715
+ if isinstance(structured_output, list) and structured_output:
716
+ if len(structured_output) == 1:
717
+ logger.info("[CUGA] Structured output is a single object in a list.")
718
+ logger.info(f"[CUGA] Final structured output: {structured_output[0]}")
719
+ return Data(data=structured_output[0])
720
+ logger.info("[CUGA] Structured output is a list of multiple objects.")
721
+ logger.info(f"[CUGA] Final structured output: {structured_output}")
722
+ return Data(data={"results": structured_output})
723
+ if isinstance(structured_output, dict):
724
+ logger.info("[CUGA] Structured output is a single dictionary.")
725
+ logger.info(f"[CUGA] Final structured output: {structured_output}")
726
+ return Data(data=structured_output)
727
+ logger.info("[CUGA] Structured output is not a list or dictionary. Returning raw content.")
728
+ logger.info(f"[CUGA] Final output content: {content}")
729
+ return Data(data={"content": content})
730
+
731
+ except (ValueError, TypeError) as e:
732
+ await logger.aerror(f"[CUGA] Error in structured output processing: {e}")
733
+ return Data(data={"content": content, "error": str(e)})
734
+
735
+ async def get_memory_data(self):
736
+ """Retrieve chat history messages.
737
+
738
+ This method fetches the conversation history from memory, excluding the current
739
+ input message to avoid duplication.
740
+
741
+ Returns:
742
+ list: List of Message objects representing the chat history
743
+ """
744
+ logger.info("[CUGA] Retrieving chat history messages.")
745
+ messages = (
746
+ await MemoryComponent(**self.get_base_args())
747
+ .set(session_id=self.graph.session_id, order="Ascending", n_messages=self.n_messages)
748
+ .retrieve_messages()
749
+ )
750
+ return [
751
+ message for message in messages if getattr(message, "id", None) != getattr(self.input_value, "id", None)
752
+ ]
753
+
754
+ async def get_llm(self):
755
+ """Get language model for the Cuga agent.
756
+
757
+ This method initializes and configures the language model based on the
758
+ selected provider and parameters.
759
+
760
+ Returns:
761
+ tuple: A tuple containing (llm_model, display_name)
762
+
763
+ Raises:
764
+ ValueError: If the model provider is invalid or model initialization fails
765
+ """
766
+ logger.info("[CUGA] Getting language model for the agent.")
767
+ logger.info(f"[CUGA] Requested LLM provider: {self.agent_llm}")
768
+
769
+ if not isinstance(self.agent_llm, str):
770
+ logger.info("[CUGA] Agent LLM is already a model instance.")
771
+ return self.agent_llm, None
772
+
773
+ try:
774
+ provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)
775
+ if not provider_info:
776
+ msg = f"Invalid model provider: {self.agent_llm}"
777
+ raise ValueError(msg)
778
+
779
+ component_class = provider_info.get("component_class")
780
+ display_name = component_class.display_name
781
+ inputs = provider_info.get("inputs")
782
+ prefix = provider_info.get("prefix", "")
783
+ logger.info(f"[CUGA] Successfully built LLM model from provider: {self.agent_llm}")
784
+ return self._build_llm_model(component_class, inputs, prefix), display_name
785
+
786
+ except (AttributeError, ValueError, TypeError, RuntimeError) as e:
787
+ await logger.aerror(f"[CUGA] Error building {self.agent_llm} language model: {e!s}")
788
+ msg = f"Failed to initialize language model: {e!s}"
789
+ raise ValueError(msg) from e
790
+
791
+ def _build_llm_model(self, component, inputs, prefix=""):
792
+ """Build LLM model with parameters.
793
+
794
+ This method constructs a language model instance using the provided component
795
+ class and input parameters.
796
+
797
+ Args:
798
+ component: The LLM component class to instantiate
799
+ inputs: List of input field definitions
800
+ prefix: Optional prefix for parameter names
801
+
802
+ Returns:
803
+ The configured LLM model instance
804
+ """
805
+ model_kwargs = {}
806
+ for input_ in inputs:
807
+ if hasattr(self, f"{prefix}{input_.name}"):
808
+ model_kwargs[input_.name] = getattr(self, f"{prefix}{input_.name}")
809
+ return component.set(**model_kwargs).build_model()
810
+
811
+ def set_component_params(self, component):
812
+ """Set component parameters based on provider.
813
+
814
+ This method configures component parameters according to the selected
815
+ model provider's requirements.
816
+
817
+ Args:
818
+ component: The component to configure
819
+
820
+ Returns:
821
+ The configured component
822
+ """
823
+ provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)
824
+ if provider_info:
825
+ inputs = provider_info.get("inputs")
826
+ prefix = provider_info.get("prefix")
827
+ model_kwargs = {}
828
+ for input_ in inputs:
829
+ if hasattr(self, f"{prefix}{input_.name}"):
830
+ model_kwargs[input_.name] = getattr(self, f"{prefix}{input_.name}")
831
+ return component.set(**model_kwargs)
832
+ return component
833
+
834
+ def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:
835
+ """Delete specified fields from build_config.
836
+
837
+ This method removes unwanted fields from the build configuration.
838
+
839
+ Args:
840
+ build_config: The build configuration dictionary
841
+ fields: Fields to remove (can be dict or list of strings)
842
+ """
843
+ for field in fields:
844
+ build_config.pop(field, None)
845
+
846
+ def update_input_types(self, build_config: dotdict) -> dotdict:
847
+ """Update input types for all fields in build_config.
848
+
849
+ This method ensures all fields in the build configuration have proper
850
+ input types defined.
851
+
852
+ Args:
853
+ build_config: The build configuration to update
854
+
855
+ Returns:
856
+ dotdict: Updated build configuration with input types
857
+ """
858
+ for key, value in build_config.items():
859
+ if isinstance(value, dict):
860
+ if value.get("input_types") is None:
861
+ build_config[key]["input_types"] = []
862
+ elif hasattr(value, "input_types") and value.input_types is None:
863
+ value.input_types = []
864
+ return build_config
865
+
866
+ async def update_build_config(
867
+ self, build_config: dotdict, field_value: str, field_name: str | None = None
868
+ ) -> dotdict:
869
+ """Update build configuration based on field changes.
870
+
871
+ This method dynamically updates the component's build configuration when
872
+ certain fields change, particularly the model provider selection.
873
+
874
+ Args:
875
+ build_config: The current build configuration
876
+ field_value: The new value for the field
877
+ field_name: The name of the field being changed
878
+
879
+ Returns:
880
+ dotdict: Updated build configuration
881
+
882
+ Raises:
883
+ ValueError: If required keys are missing from the configuration
884
+ """
885
+ if field_name in ("agent_llm",):
886
+ build_config["agent_llm"]["value"] = field_value
887
+ provider_info = MODEL_PROVIDERS_DICT.get(field_value)
888
+ if provider_info:
889
+ component_class = provider_info.get("component_class")
890
+ if component_class and hasattr(component_class, "update_build_config"):
891
+ build_config = await update_component_build_config(
892
+ component_class, build_config, field_value, "model_name"
893
+ )
894
+
895
+ provider_configs: dict[str, tuple[dict, list[dict]]] = {
896
+ provider: (
897
+ MODEL_PROVIDERS_DICT[provider]["fields"],
898
+ [
899
+ MODEL_PROVIDERS_DICT[other_provider]["fields"]
900
+ for other_provider in MODEL_PROVIDERS_DICT
901
+ if other_provider != provider
902
+ ],
903
+ )
904
+ for provider in MODEL_PROVIDERS_DICT
905
+ }
906
+ if field_value in provider_configs:
907
+ fields_to_add, fields_to_delete = provider_configs[field_value]
908
+
909
+ # Delete fields from other providers
910
+ for fields in fields_to_delete:
911
+ self.delete_fields(build_config, fields)
912
+
913
+ # Add provider-specific fields
914
+ if field_value == "OpenAI" and not any(field in build_config for field in fields_to_add):
915
+ build_config.update(fields_to_add)
916
+ else:
917
+ build_config.update(fields_to_add)
918
+ build_config["agent_llm"]["input_types"] = []
919
+ elif field_value == "Custom":
920
+ # Delete all provider fields
921
+ self.delete_fields(build_config, ALL_PROVIDER_FIELDS)
922
+ # Update with custom component
923
+ custom_component = DropdownInput(
924
+ name="agent_llm",
925
+ display_name="Language Model",
926
+ options=[*sorted(MODEL_PROVIDERS), "Custom"],
927
+ value="Custom",
928
+ real_time_refresh=True,
929
+ input_types=["LanguageModel"],
930
+ options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]
931
+ + [{"icon": "brain"}],
932
+ )
933
+ build_config.update({"agent_llm": custom_component.to_dict()})
934
+
935
+ # Update input types for all fields
936
+ build_config = self.update_input_types(build_config)
937
+
938
+ # Validate required keys
939
+ default_keys = [
940
+ "code",
941
+ "_type",
942
+ "agent_llm",
943
+ "tools",
944
+ "input_value",
945
+ "add_current_date_tool",
946
+ "policies",
947
+ "agent_description",
948
+ "max_iterations",
949
+ "handle_parsing_errors",
950
+ "verbose",
951
+ ]
952
+ missing_keys = [key for key in default_keys if key not in build_config]
953
+ if missing_keys:
954
+ msg = f"Missing required keys in build_config: {missing_keys}"
955
+ raise ValueError(msg)
956
+
957
+ if (
958
+ isinstance(self.agent_llm, str)
959
+ and self.agent_llm in MODEL_PROVIDERS_DICT
960
+ and field_name in MODEL_DYNAMIC_UPDATE_FIELDS
961
+ ):
962
+ provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)
963
+ if provider_info:
964
+ component_class = provider_info.get("component_class")
965
+ component_class = self.set_component_params(component_class)
966
+ prefix = provider_info.get("prefix")
967
+ if component_class and hasattr(component_class, "update_build_config"):
968
+ if isinstance(field_name, str) and isinstance(prefix, str):
969
+ field_name = field_name.replace(prefix, "")
970
+ build_config = await update_component_build_config(
971
+ component_class, build_config, field_value, "model_name"
972
+ )
973
+ return dotdict({k: v.to_dict() if hasattr(v, "to_dict") else v for k, v in build_config.items()})
974
+
975
+ async def _get_tools(self) -> list[Tool]:
976
+ """Build agent tools.
977
+
978
+ This method constructs the list of tools available to the Cuga agent,
979
+ including component tools and any additional configured tools.
980
+
981
+ Returns:
982
+ list[Tool]: List of available tools for the agent
983
+ """
984
+ logger.info("[CUGA] Building agent tools.")
985
+ component_toolkit = _get_component_toolkit()
986
+ tools_names = self._build_tools_names()
987
+ agent_description = self.get_tool_description()
988
+ description = f"{agent_description}{tools_names}"
989
+ tools = component_toolkit(component=self).get_tools(
990
+ tool_name="Call_CugaAgent", tool_description=description, callbacks=self.get_langchain_callbacks()
991
+ )
992
+ if hasattr(self, "tools_metadata"):
993
+ tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)
994
+ logger.info(f"[CUGA] Tools built: {[tool.name for tool in tools]}")
995
+ return tools