praisonaiagents 0.0.56__py3-none-any.whl → 0.0.58__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- praisonaiagents/llm/llm.py +23 -6
- praisonaiagents/process/process.py +352 -112
- praisonaiagents/tools/__init__.py +22 -1
- praisonaiagents/tools/csv_tools.py +54 -23
- praisonaiagents/tools/train/data/generatecot.py +500 -0
- {praisonaiagents-0.0.56.dist-info → praisonaiagents-0.0.58.dist-info}/METADATA +1 -1
- {praisonaiagents-0.0.56.dist-info → praisonaiagents-0.0.58.dist-info}/RECORD +9 -8
- {praisonaiagents-0.0.56.dist-info → praisonaiagents-0.0.58.dist-info}/WHEEL +0 -0
- {praisonaiagents-0.0.56.dist-info → praisonaiagents-0.0.58.dist-info}/top_level.txt +0 -0
praisonaiagents/llm/llm.py
CHANGED
@@ -113,11 +113,28 @@ class LLM:
|
|
113
113
|
litellm.success_callback = []
|
114
114
|
litellm._async_success_callback = []
|
115
115
|
litellm.callbacks = []
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
116
|
+
|
117
|
+
verbose = extra_settings.get('verbose', True)
|
118
|
+
|
119
|
+
# Only suppress logs if not in debug mode
|
120
|
+
if not isinstance(verbose, bool) and verbose >= 10:
|
121
|
+
# Enable detailed debug logging
|
122
|
+
logging.getLogger("asyncio").setLevel(logging.DEBUG)
|
123
|
+
logging.getLogger("selector_events").setLevel(logging.DEBUG)
|
124
|
+
logging.getLogger("litellm.utils").setLevel(logging.DEBUG)
|
125
|
+
logging.getLogger("litellm.main").setLevel(logging.DEBUG)
|
126
|
+
litellm.suppress_debug_messages = False
|
127
|
+
litellm.set_verbose = True
|
128
|
+
else:
|
129
|
+
# Suppress debug logging for normal operation
|
130
|
+
logging.getLogger("asyncio").setLevel(logging.WARNING)
|
131
|
+
logging.getLogger("selector_events").setLevel(logging.WARNING)
|
132
|
+
logging.getLogger("litellm.utils").setLevel(logging.WARNING)
|
133
|
+
logging.getLogger("litellm.main").setLevel(logging.WARNING)
|
134
|
+
litellm.suppress_debug_messages = True
|
135
|
+
litellm._logging._disable_debugging()
|
136
|
+
warnings.filterwarnings("ignore", category=RuntimeWarning)
|
137
|
+
|
121
138
|
except ImportError:
|
122
139
|
raise ImportError(
|
123
140
|
"LiteLLM is required but not installed. "
|
@@ -145,7 +162,7 @@ class LLM:
|
|
145
162
|
self.extra_settings = extra_settings
|
146
163
|
self.console = Console()
|
147
164
|
self.chat_history = []
|
148
|
-
self.verbose =
|
165
|
+
self.verbose = verbose
|
149
166
|
self.markdown = extra_settings.get('markdown', True)
|
150
167
|
self.self_reflect = extra_settings.get('self_reflect', False)
|
151
168
|
self.max_reflect = extra_settings.get('max_reflect', 3)
|
@@ -13,6 +13,13 @@ class LoopItems(BaseModel):
|
|
13
13
|
|
14
14
|
class Process:
|
15
15
|
def __init__(self, tasks: Dict[str, Task], agents: List[Agent], manager_llm: Optional[str] = None, verbose: bool = False, max_iter: int = 10):
|
16
|
+
logging.debug(f"=== Initializing Process ===")
|
17
|
+
logging.debug(f"Number of tasks: {len(tasks)}")
|
18
|
+
logging.debug(f"Number of agents: {len(agents)}")
|
19
|
+
logging.debug(f"Manager LLM: {manager_llm}")
|
20
|
+
logging.debug(f"Verbose mode: {verbose}")
|
21
|
+
logging.debug(f"Max iterations: {max_iter}")
|
22
|
+
|
16
23
|
self.tasks = tasks
|
17
24
|
self.agents = agents
|
18
25
|
self.manager_llm = manager_llm
|
@@ -21,25 +28,30 @@ class Process:
|
|
21
28
|
|
22
29
|
async def aworkflow(self) -> AsyncGenerator[str, None]:
|
23
30
|
"""Async version of workflow method"""
|
31
|
+
logging.debug("=== Starting Async Workflow ===")
|
24
32
|
current_iter = 0 # Track how many times we've looped
|
25
33
|
# Build workflow relationships first
|
34
|
+
logging.debug("Building workflow relationships...")
|
26
35
|
for task in self.tasks.values():
|
27
36
|
if task.next_tasks:
|
28
37
|
for next_task_name in task.next_tasks:
|
29
38
|
next_task = next((t for t in self.tasks.values() if t.name == next_task_name), None)
|
30
39
|
if next_task:
|
31
40
|
next_task.previous_tasks.append(task.name)
|
41
|
+
logging.debug(f"Added {task.name} as previous task for {next_task_name}")
|
32
42
|
|
33
43
|
# Find start task
|
44
|
+
logging.debug("Finding start task...")
|
34
45
|
start_task = None
|
35
46
|
for task_id, task in self.tasks.items():
|
36
47
|
if task.is_start:
|
37
48
|
start_task = task
|
49
|
+
logging.debug(f"Found marked start task: {task.name} (id: {task_id})")
|
38
50
|
break
|
39
51
|
|
40
52
|
if not start_task:
|
41
53
|
start_task = list(self.tasks.values())[0]
|
42
|
-
logging.
|
54
|
+
logging.debug(f"No start task marked, using first task: {start_task.name}")
|
43
55
|
|
44
56
|
current_task = start_task
|
45
57
|
visited_tasks = set()
|
@@ -54,7 +66,16 @@ class Process:
|
|
54
66
|
break
|
55
67
|
|
56
68
|
task_id = current_task.id
|
57
|
-
logging.
|
69
|
+
logging.debug(f"""
|
70
|
+
=== Task Execution Details ===
|
71
|
+
Current task: {current_task.name}
|
72
|
+
Type: {current_task.task_type}
|
73
|
+
Status: {current_task.status}
|
74
|
+
Previous tasks: {current_task.previous_tasks}
|
75
|
+
Next tasks: {current_task.next_tasks}
|
76
|
+
Context tasks: {[t.name for t in current_task.context] if current_task.context else []}
|
77
|
+
Description length: {len(current_task.description)}
|
78
|
+
""")
|
58
79
|
|
59
80
|
# Add context from previous tasks to description
|
60
81
|
if current_task.previous_tasks or current_task.context:
|
@@ -66,46 +87,6 @@ class Process:
|
|
66
87
|
if prev_task and prev_task.result:
|
67
88
|
# Handle loop data
|
68
89
|
if current_task.task_type == "loop":
|
69
|
-
# # create a loop manager Agent
|
70
|
-
# loop_manager = Agent(
|
71
|
-
# name="Loop Manager",
|
72
|
-
# role="Loop data processor",
|
73
|
-
# goal="Process loop data and convert it to list format",
|
74
|
-
# backstory="Expert at handling loop data and converting it to proper format",
|
75
|
-
# llm=self.manager_llm,
|
76
|
-
# verbose=self.verbose,
|
77
|
-
# markdown=True
|
78
|
-
# )
|
79
|
-
|
80
|
-
# # get the loop data convert it to list using calling Agent class chat
|
81
|
-
# loop_prompt = f"""
|
82
|
-
# Process this data into a list format:
|
83
|
-
# {prev_task.result.raw}
|
84
|
-
|
85
|
-
# Return a JSON object with an 'items' array containing the items to process.
|
86
|
-
# """
|
87
|
-
# if current_task.async_execution:
|
88
|
-
# loop_data_str = await loop_manager.achat(
|
89
|
-
# prompt=loop_prompt,
|
90
|
-
# output_json=LoopItems
|
91
|
-
# )
|
92
|
-
# else:
|
93
|
-
# loop_data_str = loop_manager.chat(
|
94
|
-
# prompt=loop_prompt,
|
95
|
-
# output_json=LoopItems
|
96
|
-
# )
|
97
|
-
|
98
|
-
# try:
|
99
|
-
# # The response will already be parsed into LoopItems model
|
100
|
-
# loop_data[f"loop_{current_task.name}"] = {
|
101
|
-
# "items": loop_data_str.items,
|
102
|
-
# "index": 0,
|
103
|
-
# "remaining": len(loop_data_str.items)
|
104
|
-
# }
|
105
|
-
# context += f"\nCurrent loop item: {loop_data_str.items[0]}"
|
106
|
-
# except Exception as e:
|
107
|
-
# display_error(f"Failed to process loop data: {e}")
|
108
|
-
# context += f"\n{prev_name}: {prev_task.result.raw}"
|
109
90
|
context += f"\n{prev_name}: {prev_task.result.raw}"
|
110
91
|
else:
|
111
92
|
context += f"\n{prev_name}: {prev_task.result.raw}"
|
@@ -119,14 +100,103 @@ class Process:
|
|
119
100
|
# Update task description with context
|
120
101
|
current_task.description = current_task.description + context
|
121
102
|
|
122
|
-
#
|
123
|
-
|
124
|
-
|
103
|
+
# Skip execution for loop tasks, only process their subtasks
|
104
|
+
if current_task.task_type == "loop":
|
105
|
+
logging.debug(f"""
|
106
|
+
=== Loop Task Details ===
|
107
|
+
Name: {current_task.name}
|
108
|
+
ID: {current_task.id}
|
109
|
+
Status: {current_task.status}
|
110
|
+
Next tasks: {current_task.next_tasks}
|
111
|
+
Condition: {current_task.condition}
|
112
|
+
Subtasks created: {getattr(current_task, '_subtasks_created', False)}
|
113
|
+
Input file: {getattr(current_task, 'input_file', None)}
|
114
|
+
""")
|
115
|
+
|
116
|
+
# Check if subtasks are created and completed
|
117
|
+
if getattr(current_task, "_subtasks_created", False):
|
118
|
+
subtasks = [
|
119
|
+
t for t in self.tasks.values()
|
120
|
+
if t.name.startswith(current_task.name + "_")
|
121
|
+
]
|
122
|
+
logging.debug(f"""
|
123
|
+
=== Subtask Status Check ===
|
124
|
+
Total subtasks: {len(subtasks)}
|
125
|
+
Completed: {sum(1 for st in subtasks if st.status == "completed")}
|
126
|
+
Pending: {sum(1 for st in subtasks if st.status != "completed")}
|
127
|
+
""")
|
128
|
+
|
129
|
+
# Log detailed subtask info
|
130
|
+
for st in subtasks:
|
131
|
+
logging.debug(f"""
|
132
|
+
Subtask: {st.name}
|
133
|
+
- Status: {st.status}
|
134
|
+
- Next tasks: {st.next_tasks}
|
135
|
+
- Condition: {st.condition}
|
136
|
+
""")
|
137
|
+
|
138
|
+
if subtasks and all(st.status == "completed" for st in subtasks):
|
139
|
+
logging.debug(f"=== All {len(subtasks)} subtasks completed for {current_task.name} ===")
|
140
|
+
|
141
|
+
# Mark loop task completed and move to next task
|
142
|
+
current_task.status = "completed"
|
143
|
+
logging.debug(f"Loop {current_task.name} marked as completed")
|
144
|
+
|
145
|
+
# Move to next task if available
|
146
|
+
if current_task.next_tasks:
|
147
|
+
next_task_name = current_task.next_tasks[0]
|
148
|
+
logging.debug(f"Attempting transition to next task: {next_task_name}")
|
149
|
+
next_task = next((t for t in self.tasks.values() if t.name == next_task_name), None)
|
150
|
+
if next_task:
|
151
|
+
logging.debug(f"=== Transitioning: {current_task.name} -> {next_task.name} ===")
|
152
|
+
logging.debug(f"Next task status: {next_task.status}")
|
153
|
+
logging.debug(f"Next task condition: {next_task.condition}")
|
154
|
+
current_task = next_task
|
155
|
+
else:
|
156
|
+
logging.debug(f"=== No next tasks for {current_task.name}, ending loop ===")
|
157
|
+
current_task = None
|
158
|
+
else:
|
159
|
+
logging.debug(f"No subtasks created yet for {current_task.name}")
|
160
|
+
# Create subtasks if needed
|
161
|
+
if current_task.input_file:
|
162
|
+
self._create_loop_subtasks(current_task)
|
163
|
+
current_task._subtasks_created = True
|
164
|
+
logging.debug(f"Created subtasks from {current_task.input_file}")
|
165
|
+
else:
|
166
|
+
# No input file, mark as done
|
167
|
+
current_task.status = "completed"
|
168
|
+
logging.debug(f"No input file, marking {current_task.name} as completed")
|
169
|
+
if current_task.next_tasks:
|
170
|
+
next_task_name = current_task.next_tasks[0]
|
171
|
+
next_task = next((t for t in self.tasks.values() if t.name == next_task_name), None)
|
172
|
+
current_task = next_task
|
173
|
+
else:
|
174
|
+
current_task = None
|
175
|
+
else:
|
176
|
+
# Execute non-loop task
|
177
|
+
logging.debug(f"=== Executing non-loop task: {current_task.name} (id: {task_id}) ===")
|
178
|
+
logging.debug(f"Task status: {current_task.status}")
|
179
|
+
logging.debug(f"Task next_tasks: {current_task.next_tasks}")
|
180
|
+
yield task_id
|
181
|
+
visited_tasks.add(task_id)
|
125
182
|
|
126
183
|
# Reset completed task to "not started" so it can run again
|
127
184
|
if self.tasks[task_id].status == "completed":
|
128
|
-
|
129
|
-
self.tasks[task_id].
|
185
|
+
# Never reset loop tasks, decision tasks, or their subtasks
|
186
|
+
subtask_name = self.tasks[task_id].name
|
187
|
+
logging.debug(f"=== Checking reset for completed task: {subtask_name} ===")
|
188
|
+
logging.debug(f"Task type: {self.tasks[task_id].task_type}")
|
189
|
+
logging.debug(f"Task status before reset check: {self.tasks[task_id].status}")
|
190
|
+
|
191
|
+
if (self.tasks[task_id].task_type not in ["loop", "decision"] and
|
192
|
+
not any(t.task_type == "loop" and subtask_name.startswith(t.name + "_")
|
193
|
+
for t in self.tasks.values())):
|
194
|
+
logging.debug(f"=== Resetting non-loop, non-decision task {subtask_name} to 'not started' ===")
|
195
|
+
self.tasks[task_id].status = "not started"
|
196
|
+
logging.debug(f"Task status after reset: {self.tasks[task_id].status}")
|
197
|
+
else:
|
198
|
+
logging.debug(f"=== Skipping reset for loop/decision/subtask: {subtask_name} ===")
|
199
|
+
logging.debug(f"Keeping status as: {self.tasks[task_id].status}")
|
130
200
|
|
131
201
|
# Handle loop progression
|
132
202
|
if current_task.task_type == "loop":
|
@@ -179,6 +249,15 @@ class Process:
|
|
179
249
|
logging.info("Workflow execution completed")
|
180
250
|
break
|
181
251
|
|
252
|
+
# Add completion logging
|
253
|
+
logging.debug(f"""
|
254
|
+
=== Task Completion ===
|
255
|
+
Task: {current_task.name}
|
256
|
+
Final status: {current_task.status}
|
257
|
+
Next task: {next_task.name if next_task else None}
|
258
|
+
Iteration: {current_iter}/{self.max_iter}
|
259
|
+
""")
|
260
|
+
|
182
261
|
async def asequential(self) -> AsyncGenerator[str, None]:
|
183
262
|
"""Async version of sequential method"""
|
184
263
|
for task_id in self.tasks:
|
@@ -343,33 +422,59 @@ Provide a JSON with the structure:
|
|
343
422
|
new_tasks = []
|
344
423
|
|
345
424
|
if file_ext == ".csv":
|
346
|
-
# existing CSV reading logic
|
347
425
|
with open(start_task.input_file, "r", encoding="utf-8") as f:
|
348
|
-
|
349
|
-
reader = csv.reader(f)
|
426
|
+
reader = csv.reader(f, quotechar='"', escapechar='\\') # Handle quoted/escaped fields
|
350
427
|
previous_task = None
|
428
|
+
task_count = 0
|
429
|
+
|
351
430
|
for i, row in enumerate(reader):
|
352
|
-
if row: # Skip empty rows
|
353
|
-
|
354
|
-
row_task = Task(
|
355
|
-
description=f"{start_task.description}\n{task_desc}" if start_task.description else task_desc,
|
356
|
-
agent=start_task.agent,
|
357
|
-
name=f"{start_task.name}_{i+1}" if start_task.name else task_desc,
|
358
|
-
expected_output=getattr(start_task, 'expected_output', None),
|
359
|
-
is_start=(i == 0),
|
360
|
-
task_type="task",
|
361
|
-
condition={
|
362
|
-
"complete": ["next"],
|
363
|
-
"retry": ["current"]
|
364
|
-
}
|
365
|
-
)
|
366
|
-
self.tasks[row_task.id] = row_task
|
367
|
-
new_tasks.append(row_task)
|
431
|
+
if not row: # Skip truly empty rows
|
432
|
+
continue
|
368
433
|
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
434
|
+
# Properly handle Q&A pairs with potential commas
|
435
|
+
task_desc = row[0].strip() if row else ""
|
436
|
+
if len(row) > 1:
|
437
|
+
# Preserve all fields in case of multiple commas
|
438
|
+
question = row[0].strip()
|
439
|
+
answer = ",".join(field.strip() for field in row[1:])
|
440
|
+
task_desc = f"Question: {question}\nAnswer: {answer}"
|
441
|
+
|
442
|
+
if not task_desc: # Skip rows with empty content
|
443
|
+
continue
|
444
|
+
|
445
|
+
task_count += 1
|
446
|
+
logging.debug(f"Processing CSV row {i+1}: {task_desc}")
|
447
|
+
|
448
|
+
# Inherit next_tasks from parent loop task
|
449
|
+
inherited_next_tasks = start_task.next_tasks if start_task.next_tasks else []
|
450
|
+
|
451
|
+
row_task = Task(
|
452
|
+
description=f"{start_task.description}\n{task_desc}" if start_task.description else task_desc,
|
453
|
+
agent=start_task.agent,
|
454
|
+
name=f"{start_task.name}_{task_count}" if start_task.name else task_desc,
|
455
|
+
expected_output=getattr(start_task, 'expected_output', None),
|
456
|
+
is_start=(task_count == 1),
|
457
|
+
task_type="decision", # Change to decision type
|
458
|
+
next_tasks=inherited_next_tasks, # Inherit parent's next tasks
|
459
|
+
condition={
|
460
|
+
"done": inherited_next_tasks if inherited_next_tasks else ["next"], # Use full inherited_next_tasks
|
461
|
+
"retry": ["current"],
|
462
|
+
"exit": [] # Empty list for exit condition
|
463
|
+
}
|
464
|
+
)
|
465
|
+
self.tasks[row_task.id] = row_task
|
466
|
+
new_tasks.append(row_task)
|
467
|
+
|
468
|
+
if previous_task:
|
469
|
+
previous_task.next_tasks = [row_task.name]
|
470
|
+
previous_task.condition["done"] = [row_task.name] # Use "done" consistently
|
471
|
+
previous_task = row_task
|
472
|
+
|
473
|
+
# For the last task in the loop, ensure it points to parent's next tasks
|
474
|
+
if task_count > 0 and not row_task.next_tasks:
|
475
|
+
row_task.next_tasks = inherited_next_tasks
|
476
|
+
|
477
|
+
logging.info(f"Processed {task_count} rows from CSV file")
|
373
478
|
else:
|
374
479
|
# If not CSV, read lines
|
375
480
|
with open(start_task.input_file, "r", encoding="utf-8") as f:
|
@@ -402,7 +507,7 @@ Provide a JSON with the structure:
|
|
402
507
|
except Exception as e:
|
403
508
|
logging.error(f"Failed to read file tasks: {e}")
|
404
509
|
|
405
|
-
# end of
|
510
|
+
# end of start task handling
|
406
511
|
current_task = start_task
|
407
512
|
visited_tasks = set()
|
408
513
|
loop_data = {} # Store loop-specific data
|
@@ -413,8 +518,88 @@ Provide a JSON with the structure:
|
|
413
518
|
logging.info(f"Max iteration limit {self.max_iter} reached, ending workflow.")
|
414
519
|
break
|
415
520
|
|
521
|
+
# Handle loop task file reading at runtime
|
522
|
+
if (current_task.task_type == "loop" and
|
523
|
+
current_task is not start_task and
|
524
|
+
getattr(current_task, "_subtasks_created", False) is not True):
|
525
|
+
|
526
|
+
if not current_task.input_file:
|
527
|
+
current_task.input_file = "tasks.csv"
|
528
|
+
|
529
|
+
if getattr(current_task, "input_file", None):
|
530
|
+
try:
|
531
|
+
file_ext = os.path.splitext(current_task.input_file)[1].lower()
|
532
|
+
new_tasks = []
|
533
|
+
|
534
|
+
if file_ext == ".csv":
|
535
|
+
with open(current_task.input_file, "r", encoding="utf-8") as f:
|
536
|
+
reader = csv.reader(f)
|
537
|
+
previous_task = None
|
538
|
+
for i, row in enumerate(reader):
|
539
|
+
if row: # Skip empty rows
|
540
|
+
task_desc = row[0] # Take first column
|
541
|
+
row_task = Task(
|
542
|
+
description=f"{current_task.description}\n{task_desc}" if current_task.description else task_desc,
|
543
|
+
agent=current_task.agent,
|
544
|
+
name=f"{current_task.name}_{i+1}" if current_task.name else task_desc,
|
545
|
+
expected_output=getattr(current_task, 'expected_output', None),
|
546
|
+
is_start=(i == 0),
|
547
|
+
task_type="task",
|
548
|
+
condition={
|
549
|
+
"complete": ["next"],
|
550
|
+
"retry": ["current"]
|
551
|
+
}
|
552
|
+
)
|
553
|
+
self.tasks[row_task.id] = row_task
|
554
|
+
new_tasks.append(row_task)
|
555
|
+
|
556
|
+
if previous_task:
|
557
|
+
previous_task.next_tasks = [row_task.name]
|
558
|
+
previous_task.condition["complete"] = [row_task.name]
|
559
|
+
previous_task = row_task
|
560
|
+
else:
|
561
|
+
with open(current_task.input_file, "r", encoding="utf-8") as f:
|
562
|
+
lines = f.read().splitlines()
|
563
|
+
previous_task = None
|
564
|
+
for i, line in enumerate(lines):
|
565
|
+
row_task = Task(
|
566
|
+
description=f"{current_task.description}\n{line.strip()}" if current_task.description else line.strip(),
|
567
|
+
agent=current_task.agent,
|
568
|
+
name=f"{current_task.name}_{i+1}" if current_task.name else line.strip(),
|
569
|
+
expected_output=getattr(current_task, 'expected_output', None),
|
570
|
+
is_start=(i == 0),
|
571
|
+
task_type="task",
|
572
|
+
condition={
|
573
|
+
"complete": ["next"],
|
574
|
+
"retry": ["current"]
|
575
|
+
}
|
576
|
+
)
|
577
|
+
self.tasks[row_task.id] = row_task
|
578
|
+
new_tasks.append(row_task)
|
579
|
+
|
580
|
+
if previous_task:
|
581
|
+
previous_task.next_tasks = [row_task.name]
|
582
|
+
previous_task.condition["complete"] = [row_task.name]
|
583
|
+
previous_task = row_task
|
584
|
+
|
585
|
+
if new_tasks:
|
586
|
+
current_task.next_tasks = [new_tasks[0].name]
|
587
|
+
current_task._subtasks_created = True
|
588
|
+
logging.info(f"Created {len(new_tasks)} tasks from: {current_task.input_file} for loop task {current_task.name}")
|
589
|
+
except Exception as e:
|
590
|
+
logging.error(f"Failed to read file tasks for loop task {current_task.name}: {e}")
|
591
|
+
|
416
592
|
task_id = current_task.id
|
417
|
-
logging.
|
593
|
+
logging.debug(f"""
|
594
|
+
=== Task Execution Details ===
|
595
|
+
Current task: {current_task.name}
|
596
|
+
Type: {current_task.task_type}
|
597
|
+
Status: {current_task.status}
|
598
|
+
Previous tasks: {current_task.previous_tasks}
|
599
|
+
Next tasks: {current_task.next_tasks}
|
600
|
+
Context tasks: {[t.name for t in current_task.context] if current_task.context else []}
|
601
|
+
Description length: {len(current_task.description)}
|
602
|
+
""")
|
418
603
|
|
419
604
|
# Add context from previous tasks to description
|
420
605
|
if current_task.previous_tasks or current_task.context:
|
@@ -426,40 +611,6 @@ Provide a JSON with the structure:
|
|
426
611
|
if prev_task and prev_task.result:
|
427
612
|
# Handle loop data
|
428
613
|
if current_task.task_type == "loop":
|
429
|
-
# # create a loop manager Agent
|
430
|
-
# loop_manager = Agent(
|
431
|
-
# name="Loop Manager",
|
432
|
-
# role="Loop data processor",
|
433
|
-
# goal="Process loop data and convert it to list format",
|
434
|
-
# backstory="Expert at handling loop data and converting it to proper format",
|
435
|
-
# llm=self.manager_llm,
|
436
|
-
# verbose=self.verbose,
|
437
|
-
# markdown=True
|
438
|
-
# )
|
439
|
-
|
440
|
-
# # get the loop data convert it to list using calling Agent class chat
|
441
|
-
# loop_prompt = f"""
|
442
|
-
# Process this data into a list format:
|
443
|
-
# {prev_task.result.raw}
|
444
|
-
|
445
|
-
# Return a JSON object with an 'items' array containing the items to process.
|
446
|
-
# """
|
447
|
-
# loop_data_str = loop_manager.chat(
|
448
|
-
# prompt=loop_prompt,
|
449
|
-
# output_json=LoopItems
|
450
|
-
# )
|
451
|
-
|
452
|
-
# try:
|
453
|
-
# # The response will already be parsed into LoopItems model
|
454
|
-
# loop_data[f"loop_{current_task.name}"] = {
|
455
|
-
# "items": loop_data_str.items,
|
456
|
-
# "index": 0,
|
457
|
-
# "remaining": len(loop_data_str.items)
|
458
|
-
# }
|
459
|
-
# context += f"\nCurrent loop item: {loop_data_str.items[0]}"
|
460
|
-
# except Exception as e:
|
461
|
-
# display_error(f"Failed to process loop data: {e}")
|
462
|
-
# context += f"\n{prev_name}: {prev_task.result.raw}"
|
463
614
|
context += f"\n{prev_name}: {prev_task.result.raw}"
|
464
615
|
else:
|
465
616
|
context += f"\n{prev_name}: {prev_task.result.raw}"
|
@@ -473,14 +624,103 @@ Provide a JSON with the structure:
|
|
473
624
|
# Update task description with context
|
474
625
|
current_task.description = current_task.description + context
|
475
626
|
|
476
|
-
#
|
477
|
-
|
478
|
-
|
627
|
+
# Skip execution for loop tasks, only process their subtasks
|
628
|
+
if current_task.task_type == "loop":
|
629
|
+
logging.debug(f"""
|
630
|
+
=== Loop Task Details ===
|
631
|
+
Name: {current_task.name}
|
632
|
+
ID: {current_task.id}
|
633
|
+
Status: {current_task.status}
|
634
|
+
Next tasks: {current_task.next_tasks}
|
635
|
+
Condition: {current_task.condition}
|
636
|
+
Subtasks created: {getattr(current_task, '_subtasks_created', False)}
|
637
|
+
Input file: {getattr(current_task, 'input_file', None)}
|
638
|
+
""")
|
639
|
+
|
640
|
+
# Check if subtasks are created and completed
|
641
|
+
if getattr(current_task, "_subtasks_created", False):
|
642
|
+
subtasks = [
|
643
|
+
t for t in self.tasks.values()
|
644
|
+
if t.name.startswith(current_task.name + "_")
|
645
|
+
]
|
646
|
+
|
647
|
+
logging.debug(f"""
|
648
|
+
=== Subtask Status Check ===
|
649
|
+
Total subtasks: {len(subtasks)}
|
650
|
+
Completed: {sum(1 for st in subtasks if st.status == "completed")}
|
651
|
+
Pending: {sum(1 for st in subtasks if st.status != "completed")}
|
652
|
+
""")
|
653
|
+
|
654
|
+
for st in subtasks:
|
655
|
+
logging.debug(f"""
|
656
|
+
Subtask: {st.name}
|
657
|
+
- Status: {st.status}
|
658
|
+
- Next tasks: {st.next_tasks}
|
659
|
+
- Condition: {st.condition}
|
660
|
+
""")
|
661
|
+
|
662
|
+
if subtasks and all(st.status == "completed" for st in subtasks):
|
663
|
+
logging.debug(f"=== All {len(subtasks)} subtasks completed for {current_task.name} ===")
|
664
|
+
|
665
|
+
# Mark loop task completed and move to next task
|
666
|
+
current_task.status = "completed"
|
667
|
+
logging.debug(f"Loop {current_task.name} marked as completed")
|
668
|
+
|
669
|
+
# Move to next task if available
|
670
|
+
if current_task.next_tasks:
|
671
|
+
next_task_name = current_task.next_tasks[0]
|
672
|
+
logging.debug(f"Attempting transition to next task: {next_task_name}")
|
673
|
+
next_task = next((t for t in self.tasks.values() if t.name == next_task_name), None)
|
674
|
+
if next_task:
|
675
|
+
logging.debug(f"=== Transitioning: {current_task.name} -> {next_task.name} ===")
|
676
|
+
logging.debug(f"Next task status: {next_task.status}")
|
677
|
+
logging.debug(f"Next task condition: {next_task.condition}")
|
678
|
+
current_task = next_task
|
679
|
+
else:
|
680
|
+
logging.debug(f"=== No next tasks for {current_task.name}, ending loop ===")
|
681
|
+
current_task = None
|
682
|
+
else:
|
683
|
+
logging.debug(f"No subtasks created yet for {current_task.name}")
|
684
|
+
# Create subtasks if needed
|
685
|
+
if current_task.input_file:
|
686
|
+
self._create_loop_subtasks(current_task)
|
687
|
+
current_task._subtasks_created = True
|
688
|
+
logging.debug(f"Created subtasks from {current_task.input_file}")
|
689
|
+
else:
|
690
|
+
# No input file, mark as done
|
691
|
+
current_task.status = "completed"
|
692
|
+
logging.debug(f"No input file, marking {current_task.name} as completed")
|
693
|
+
if current_task.next_tasks:
|
694
|
+
next_task_name = current_task.next_tasks[0]
|
695
|
+
next_task = next((t for t in self.tasks.values() if t.name == next_task_name), None)
|
696
|
+
current_task = next_task
|
697
|
+
else:
|
698
|
+
current_task = None
|
699
|
+
else:
|
700
|
+
# Execute non-loop task
|
701
|
+
logging.debug(f"=== Executing non-loop task: {current_task.name} (id: {task_id}) ===")
|
702
|
+
logging.debug(f"Task status: {current_task.status}")
|
703
|
+
logging.debug(f"Task next_tasks: {current_task.next_tasks}")
|
704
|
+
yield task_id
|
705
|
+
visited_tasks.add(task_id)
|
479
706
|
|
480
|
-
# Reset completed task to "not started" so it can run again
|
707
|
+
# Reset completed task to "not started" so it can run again
|
481
708
|
if self.tasks[task_id].status == "completed":
|
482
|
-
|
483
|
-
self.tasks[task_id].
|
709
|
+
# Never reset loop tasks, decision tasks, or their subtasks
|
710
|
+
subtask_name = self.tasks[task_id].name
|
711
|
+
logging.debug(f"=== Checking reset for completed task: {subtask_name} ===")
|
712
|
+
logging.debug(f"Task type: {self.tasks[task_id].task_type}")
|
713
|
+
logging.debug(f"Task status before reset check: {self.tasks[task_id].status}")
|
714
|
+
|
715
|
+
if (self.tasks[task_id].task_type not in ["loop", "decision"] and
|
716
|
+
not any(t.task_type == "loop" and subtask_name.startswith(t.name + "_")
|
717
|
+
for t in self.tasks.values())):
|
718
|
+
logging.debug(f"=== Resetting non-loop, non-decision task {subtask_name} to 'not started' ===")
|
719
|
+
self.tasks[task_id].status = "not started"
|
720
|
+
logging.debug(f"Task status after reset: {self.tasks[task_id].status}")
|
721
|
+
else:
|
722
|
+
logging.debug(f"=== Skipping reset for loop/decision/subtask: {subtask_name} ===")
|
723
|
+
logging.debug(f"Keeping status as: {self.tasks[task_id].status}")
|
484
724
|
|
485
725
|
# Handle loop progression
|
486
726
|
if current_task.task_type == "loop":
|
@@ -135,6 +135,27 @@ TOOL_MAPPINGS = {
|
|
135
135
|
'group_by': ('.pandas_tools', 'PandasTools'),
|
136
136
|
'pivot_table': ('.pandas_tools', 'PandasTools'),
|
137
137
|
'pandas_tools': ('.pandas_tools', 'PandasTools'),
|
138
|
+
|
139
|
+
# Chain of Thought Training Tools
|
140
|
+
'cot_run': ('.train.data.generatecot', 'GenerateCOT'), # Orchestrates text solution
|
141
|
+
'cot_run_dict': ('.train.data.generatecot', 'GenerateCOT'), # Orchestrates dict-based solution
|
142
|
+
'cot_generate': ('.train.data.generatecot', 'GenerateCOT'), # Generate text solution
|
143
|
+
'cot_generate_dict': ('.train.data.generatecot', 'GenerateCOT'), # Generate structured solution
|
144
|
+
'cot_improve': ('.train.data.generatecot', 'GenerateCOT'), # Improve text solution
|
145
|
+
'cot_improve_dict': ('.train.data.generatecot', 'GenerateCOT'), # Improve dict-based solution
|
146
|
+
'cot_check': ('.train.data.generatecot', 'GenerateCOT'), # Check correctness
|
147
|
+
'cot_find_error': ('.train.data.generatecot', 'GenerateCOT'), # Locate error in solution
|
148
|
+
'cot_load_answers': ('.train.data.generatecot', 'GenerateCOT'), # Load QA pairs
|
149
|
+
|
150
|
+
# COT Save/Export with QA Pairs
|
151
|
+
'cot_save_solutions_with_qa_pairs': ('.train.data.generatecot', 'GenerateCOT'), # Save with QA pairs
|
152
|
+
'cot_append_solutions_with_qa_pairs': ('.train.data.generatecot', 'GenerateCOT'), # Append with QA pairs
|
153
|
+
'cot_export_json_with_qa_pairs': ('.train.data.generatecot', 'GenerateCOT'), # Export JSON with QA pairs
|
154
|
+
'cot_export_csv_with_qa_pairs': ('.train.data.generatecot', 'GenerateCOT'), # Export CSV with QA pairs
|
155
|
+
'cot_append_csv_with_qa_pairs': ('.train.data.generatecot', 'GenerateCOT'), # Append CSV with QA pairs
|
156
|
+
'cot_save': ('.train.data.generatecot', 'GenerateCOT'), # Save single QA to file
|
157
|
+
'cot_upload_to_huggingface': ('.train.data.generatecot', 'GenerateCOT'), # Upload dataset to HuggingFace
|
158
|
+
'cot_tools': ('.train.data.generatecot', 'GenerateCOT'), # Full toolkit access
|
138
159
|
}
|
139
160
|
|
140
161
|
_instances = {} # Cache for class instances
|
@@ -161,7 +182,7 @@ def __getattr__(name: str) -> Any:
|
|
161
182
|
]:
|
162
183
|
return getattr(module, name)
|
163
184
|
if name in ['file_tools', 'pandas_tools', 'wikipedia_tools',
|
164
|
-
'newspaper_tools', 'arxiv_tools', 'spider_tools', 'duckdb_tools', 'csv_tools', 'json_tools', 'excel_tools', 'xml_tools', 'yaml_tools', 'calculator_tools', 'python_tools', 'shell_tools']:
|
185
|
+
'newspaper_tools', 'arxiv_tools', 'spider_tools', 'duckdb_tools', 'csv_tools', 'json_tools', 'excel_tools', 'xml_tools', 'yaml_tools', 'calculator_tools', 'python_tools', 'shell_tools', 'cot_tools']:
|
165
186
|
return module # Returns the callable module
|
166
187
|
return getattr(module, name)
|
167
188
|
else:
|
@@ -89,7 +89,7 @@ class CSVTools:
|
|
89
89
|
def write_csv(
|
90
90
|
self,
|
91
91
|
filepath: str,
|
92
|
-
data: List[Dict[str, Any]],
|
92
|
+
data: Union[List[Dict[str, Any]], str],
|
93
93
|
encoding: str = 'utf-8',
|
94
94
|
delimiter: str = ',',
|
95
95
|
index: bool = False,
|
@@ -102,35 +102,66 @@ class CSVTools:
|
|
102
102
|
|
103
103
|
Args:
|
104
104
|
filepath: Path to CSV file
|
105
|
-
data:
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
105
|
+
data: Either a list of dictionaries or a string containing CSV data
|
106
|
+
If string, each line should be comma-separated values
|
107
|
+
encoding: File encoding (default: 'utf-8')
|
108
|
+
delimiter: Column delimiter (default: ',')
|
109
|
+
index: Whether to write row indices (default: False)
|
110
|
+
header: Whether to write column headers (default: True)
|
111
|
+
float_format: Format string for float values (default: None)
|
112
|
+
date_format: Format string for date values (default: None)
|
113
|
+
mode: Write mode - 'w' for write, 'a' for append (default: 'w')
|
114
|
+
|
114
115
|
Returns:
|
115
|
-
bool:
|
116
|
+
bool: True if successful, False otherwise
|
116
117
|
"""
|
117
118
|
try:
|
118
119
|
pd = self._get_pandas()
|
119
120
|
if pd is None:
|
120
121
|
return False
|
121
122
|
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
123
|
+
# Handle string input
|
124
|
+
if isinstance(data, str):
|
125
|
+
# Convert string to list of dicts
|
126
|
+
rows = []
|
127
|
+
if delimiter in data:
|
128
|
+
# Get existing columns if file exists and in append mode
|
129
|
+
existing_cols = []
|
130
|
+
if mode == 'a' and Path(filepath).exists():
|
131
|
+
try:
|
132
|
+
existing_df = pd.read_csv(filepath, nrows=1)
|
133
|
+
existing_cols = existing_df.columns.tolist()
|
134
|
+
except:
|
135
|
+
pass
|
136
|
+
|
137
|
+
values = [v.strip() for v in data.split(delimiter)]
|
138
|
+
|
139
|
+
if existing_cols:
|
140
|
+
# Use existing column names
|
141
|
+
row_dict = dict(zip(existing_cols, values))
|
142
|
+
else:
|
143
|
+
# Create generic column names
|
144
|
+
row_dict = {f'col{i}': val for i, val in enumerate(values)}
|
145
|
+
|
146
|
+
rows.append(row_dict)
|
147
|
+
data = rows
|
148
|
+
|
149
|
+
df = pd.DataFrame(data)
|
150
|
+
|
151
|
+
# Handle append mode properly
|
152
|
+
write_header = header if mode == 'w' else (header and not Path(filepath).exists())
|
153
|
+
|
154
|
+
df.to_csv(
|
155
|
+
filepath,
|
156
|
+
encoding=encoding,
|
157
|
+
sep=delimiter,
|
158
|
+
index=index,
|
159
|
+
header=write_header,
|
160
|
+
float_format=float_format,
|
161
|
+
date_format=date_format,
|
162
|
+
mode=mode
|
163
|
+
)
|
164
|
+
return True
|
134
165
|
|
135
166
|
except Exception as e:
|
136
167
|
error_msg = f"Error writing CSV file {filepath}: {str(e)}"
|
@@ -0,0 +1,500 @@
|
|
1
|
+
from typing import Dict, Optional, Union, Any
|
2
|
+
import json
|
3
|
+
from datetime import datetime
|
4
|
+
from openai import OpenAI
|
5
|
+
from pydantic import BaseModel
|
6
|
+
|
7
|
+
# Lazy loader for LLM
|
8
|
+
def get_llm():
|
9
|
+
try:
|
10
|
+
from praisonaiagents.llm.llm import LLM
|
11
|
+
return LLM
|
12
|
+
except ImportError:
|
13
|
+
raise ImportError(
|
14
|
+
"LLM is required for this toolbut not installed. "
|
15
|
+
"Please install with: pip install 'praisonaiagents[llm]' datasets huggingface-hub pandas"
|
16
|
+
)
|
17
|
+
|
18
|
+
class GenerateCOT:
|
19
|
+
def __init__(
|
20
|
+
self,
|
21
|
+
qa_pairs: Optional[Dict[str, str]] = None,
|
22
|
+
model: str = "gpt-4o-mini",
|
23
|
+
api_key: Optional[str] = None,
|
24
|
+
max_attempts: int = 3
|
25
|
+
):
|
26
|
+
self.qa_pairs = qa_pairs or {}
|
27
|
+
self.max_attempts = max_attempts
|
28
|
+
self.solutions = {}
|
29
|
+
self.llm = get_llm()(model=model) # Get LLM class and instantiate
|
30
|
+
self.model = model
|
31
|
+
|
32
|
+
def _ask_ai(self, prompt: str) -> str:
|
33
|
+
return self.llm.get_response(prompt, temperature=0.7)
|
34
|
+
|
35
|
+
def _build_solution_prompt(self, question: str, context: str) -> str:
|
36
|
+
return f"""
|
37
|
+
Solve this problem step by step: {question}
|
38
|
+
Context: {context}
|
39
|
+
Steps needed:
|
40
|
+
1. Break down the problem
|
41
|
+
2. Show your work
|
42
|
+
3. Explain each step
|
43
|
+
4. Give final answer
|
44
|
+
"""
|
45
|
+
|
46
|
+
def cot_generate(self, question: str, context: str = "") -> str:
|
47
|
+
prompt = self._build_solution_prompt(question, context)
|
48
|
+
return self._ask_ai(prompt)
|
49
|
+
|
50
|
+
def cot_check(self, question: str, answer: str) -> bool:
|
51
|
+
if question not in self.qa_pairs:
|
52
|
+
raise ValueError(f"No correct answer found for: {question}")
|
53
|
+
|
54
|
+
prompt = f"""
|
55
|
+
Question: {question}
|
56
|
+
Given Answer: {answer}
|
57
|
+
Correct Answer: {self.qa_pairs[question]}
|
58
|
+
Is the given answer correct? Reply only with 'true' or 'false'.
|
59
|
+
"""
|
60
|
+
return self._ask_ai(prompt).lower().strip() == "true"
|
61
|
+
|
62
|
+
def cot_find_error(self, question: str, solution: str) -> int:
|
63
|
+
if self.cot_check(question, solution):
|
64
|
+
return -1
|
65
|
+
|
66
|
+
sentences = [s.strip() for s in solution.replace('。', '.').split('.') if s.strip()]
|
67
|
+
left, right = 0, len(sentences)
|
68
|
+
|
69
|
+
while left < right:
|
70
|
+
mid = (left + right) // 2
|
71
|
+
partial = '. '.join(sentences[:mid]) + '.'
|
72
|
+
if self.cot_check(question, partial):
|
73
|
+
left = mid + 1
|
74
|
+
else:
|
75
|
+
right = mid
|
76
|
+
|
77
|
+
return left
|
78
|
+
|
79
|
+
def cot_improve(self, question: str, current: str) -> str:
|
80
|
+
best_solution = current
|
81
|
+
best_score = self._rate_solution(question, current)
|
82
|
+
attempts = 0
|
83
|
+
|
84
|
+
while attempts < self.max_attempts:
|
85
|
+
attempts += 1
|
86
|
+
new_solution = self.cot_generate(question, current)
|
87
|
+
new_score = self._rate_solution(question, new_solution)
|
88
|
+
|
89
|
+
if new_score > best_score:
|
90
|
+
best_solution = new_solution
|
91
|
+
best_score = new_score
|
92
|
+
|
93
|
+
if best_score > 0.8:
|
94
|
+
break
|
95
|
+
|
96
|
+
return best_solution
|
97
|
+
|
98
|
+
def _rate_solution(self, question: str, solution: str) -> float:
|
99
|
+
prompt = f"""
|
100
|
+
Rate this solution from 0 to 1:
|
101
|
+
Question: {question}
|
102
|
+
Solution: {solution}
|
103
|
+
Correct Answer: {self.qa_pairs.get(question, '')}
|
104
|
+
Return only a number between 0 and 1.
|
105
|
+
"""
|
106
|
+
try:
|
107
|
+
score = float(self._ask_ai(prompt))
|
108
|
+
return min(max(score, 0), 1)
|
109
|
+
except:
|
110
|
+
return 0.0
|
111
|
+
|
112
|
+
def cot_run(self, question: str) -> str:
|
113
|
+
"""Run COT generation for a single question."""
|
114
|
+
solution = self.cot_generate(question)
|
115
|
+
if self.cot_check(question, solution):
|
116
|
+
return solution
|
117
|
+
|
118
|
+
solution = self.cot_improve(question, solution)
|
119
|
+
|
120
|
+
error_pos = self.cot_find_error(question, solution)
|
121
|
+
if error_pos != -1:
|
122
|
+
correct_part = '. '.join(solution.split('. ')[:error_pos]) + '.'
|
123
|
+
solution = self.cot_generate(question, correct_part)
|
124
|
+
|
125
|
+
self.solutions[question] = {
|
126
|
+
"solution": solution,
|
127
|
+
"error_position": error_pos,
|
128
|
+
}
|
129
|
+
return solution
|
130
|
+
|
131
|
+
def cot_load_answers(self, filepath: str) -> bool:
|
132
|
+
try:
|
133
|
+
with open(filepath, 'r', encoding='utf-8') as f:
|
134
|
+
data = json.load(f)
|
135
|
+
self.qa_pairs.update(data)
|
136
|
+
return True
|
137
|
+
except Exception as e:
|
138
|
+
print(f"Error loading answers: {e}")
|
139
|
+
return False
|
140
|
+
|
141
|
+
def _is_qa_pairs(self, qa_pairs: Any) -> bool:
|
142
|
+
"""Validate if input is a proper QA pairs dictionary."""
|
143
|
+
if not qa_pairs:
|
144
|
+
return True # None or empty is valid
|
145
|
+
if not isinstance(qa_pairs, dict):
|
146
|
+
raise ValueError("qa_pairs must be a dictionary with questions as keys and answers as values")
|
147
|
+
return True
|
148
|
+
|
149
|
+
def cot_append_solutions_with_qa_pairs(
|
150
|
+
self,
|
151
|
+
filepath: str = 'solutions.json',
|
152
|
+
qa_pairs: Optional[Dict[str, str]] = None
|
153
|
+
) -> None:
|
154
|
+
"""Appends current solutions to existing file or creates a new one."""
|
155
|
+
try:
|
156
|
+
self._is_qa_pairs(qa_pairs) # Validate format
|
157
|
+
if qa_pairs:
|
158
|
+
self.qa_pairs.update(qa_pairs)
|
159
|
+
|
160
|
+
data = {
|
161
|
+
"solutions": self.solutions,
|
162
|
+
"qa_pairs": self.qa_pairs,
|
163
|
+
"saved_at": datetime.now().isoformat()
|
164
|
+
}
|
165
|
+
|
166
|
+
existing_data = {}
|
167
|
+
try:
|
168
|
+
with open(filepath, 'r', encoding='utf-8') as f:
|
169
|
+
existing_data = json.load(f)
|
170
|
+
except (FileNotFoundError, json.JSONDecodeError):
|
171
|
+
pass
|
172
|
+
|
173
|
+
if existing_data:
|
174
|
+
existing_data["solutions"].update(data["solutions"])
|
175
|
+
existing_data["qa_pairs"].update(data["qa_pairs"])
|
176
|
+
existing_data["saved_at"] = data["saved_at"]
|
177
|
+
data = existing_data
|
178
|
+
|
179
|
+
with open(filepath, 'w', encoding='utf-8') as f:
|
180
|
+
json.dump(data, f, ensure_ascii=False, indent=2)
|
181
|
+
except Exception as e:
|
182
|
+
print(f"Error appending solutions: {e}")
|
183
|
+
|
184
|
+
def cot_save_solutions_with_qa_pairs(
|
185
|
+
self,
|
186
|
+
filepath: str = 'solutions.json',
|
187
|
+
append: bool = False,
|
188
|
+
qa_pairs: Optional[Dict[str, str]] = None
|
189
|
+
) -> None:
|
190
|
+
try:
|
191
|
+
self._is_qa_pairs(qa_pairs) # Validate format
|
192
|
+
if qa_pairs:
|
193
|
+
self.qa_pairs.update(qa_pairs)
|
194
|
+
|
195
|
+
if append:
|
196
|
+
self.cot_append_solutions_with_qa_pairs(filepath)
|
197
|
+
return
|
198
|
+
|
199
|
+
data = {
|
200
|
+
"solutions": self.solutions,
|
201
|
+
"qa_pairs": self.qa_pairs,
|
202
|
+
"saved_at": datetime.now().isoformat()
|
203
|
+
}
|
204
|
+
with open(filepath, 'w', encoding='utf-8') as f:
|
205
|
+
json.dump(data, f, ensure_ascii=False, indent=2)
|
206
|
+
except Exception as e:
|
207
|
+
print(f"Error saving solutions: {e}")
|
208
|
+
|
209
|
+
def cot_generate_dict(self, question: str, context: str = "") -> dict:
|
210
|
+
prompt = self._build_solution_prompt(question, context)
|
211
|
+
thought_process = self._ask_ai(prompt)
|
212
|
+
|
213
|
+
final_answer_prompt = f"""
|
214
|
+
Based on this solution, what is the final answer only:
|
215
|
+
{thought_process}
|
216
|
+
Give only the final answer, no explanation.
|
217
|
+
"""
|
218
|
+
final_answer = self._ask_ai(final_answer_prompt)
|
219
|
+
return {
|
220
|
+
"thought_process": thought_process,
|
221
|
+
"final_answer": final_answer
|
222
|
+
}
|
223
|
+
|
224
|
+
def cot_improve_dict(self, question: str, current_solution: str) -> dict:
|
225
|
+
"""
|
226
|
+
Improves the existing solution (text form), returning the best dictionary-based version.
|
227
|
+
"""
|
228
|
+
best_solution = {
|
229
|
+
"thought_process": current_solution,
|
230
|
+
"final_answer": current_solution
|
231
|
+
}
|
232
|
+
best_score = self._rate_solution(question, current_solution)
|
233
|
+
attempts = 0
|
234
|
+
|
235
|
+
while attempts < self.max_attempts:
|
236
|
+
attempts += 1
|
237
|
+
new_solution = self.cot_generate_dict(question, current_solution)
|
238
|
+
new_score = self._rate_solution(question, new_solution["thought_process"])
|
239
|
+
if new_score > best_score:
|
240
|
+
best_solution = new_solution
|
241
|
+
best_score = new_score
|
242
|
+
if best_score > 0.8:
|
243
|
+
break
|
244
|
+
return best_solution
|
245
|
+
|
246
|
+
def cot_run_dict(self, question: str) -> dict:
|
247
|
+
"""Uses the dictionary-based solution approach, storing the final solution in self.solutions."""
|
248
|
+
solution = self.cot_generate_dict(question)
|
249
|
+
if self.cot_check(question, solution["final_answer"]):
|
250
|
+
self.solutions[question] = solution
|
251
|
+
return solution
|
252
|
+
|
253
|
+
improved = self.cot_improve_dict(question, solution["thought_process"])
|
254
|
+
if self.cot_check(question, improved["final_answer"]):
|
255
|
+
self.solutions[question] = improved
|
256
|
+
return improved
|
257
|
+
|
258
|
+
error_pos = self.cot_find_error(question, improved["thought_process"])
|
259
|
+
if error_pos != -1:
|
260
|
+
partial_solution = '. '.join(improved["thought_process"].split('. ')[:error_pos]) + '.'
|
261
|
+
final = self.cot_generate_dict(question, partial_solution)
|
262
|
+
self.solutions[question] = final
|
263
|
+
return final
|
264
|
+
|
265
|
+
self.solutions[question] = improved
|
266
|
+
return improved
|
267
|
+
|
268
|
+
def cot_export_json_with_qa_pairs(
|
269
|
+
self,
|
270
|
+
filepath: str = 'dataset.json',
|
271
|
+
save_to_file: bool = True,
|
272
|
+
qa_pairs: Optional[Dict[str, str]] = None
|
273
|
+
) -> Union[str, list]:
|
274
|
+
"""
|
275
|
+
Export solutions in Alpaca training format with their full thought process.
|
276
|
+
"""
|
277
|
+
try:
|
278
|
+
self._is_qa_pairs(qa_pairs) # Validate format
|
279
|
+
if qa_pairs:
|
280
|
+
self.qa_pairs.update(qa_pairs)
|
281
|
+
# Generate solutions if empty
|
282
|
+
if not self.solutions:
|
283
|
+
for question in qa_pairs:
|
284
|
+
self.cot_run_dict(question)
|
285
|
+
|
286
|
+
alpaca_data = []
|
287
|
+
for question, sol in self.solutions.items():
|
288
|
+
alpaca_data.append({
|
289
|
+
"instruction": question,
|
290
|
+
"input": "",
|
291
|
+
"output": sol.get("thought_process", "")
|
292
|
+
})
|
293
|
+
|
294
|
+
if not save_to_file:
|
295
|
+
return alpaca_data
|
296
|
+
|
297
|
+
with open(filepath, 'w', encoding='utf-8') as f:
|
298
|
+
json.dump(alpaca_data, f, ensure_ascii=False, indent=2)
|
299
|
+
return filepath
|
300
|
+
except Exception as e:
|
301
|
+
print(f"Error exporting to Alpaca format: {e}")
|
302
|
+
return None
|
303
|
+
|
304
|
+
def cot_export_csv_with_qa_pairs(
|
305
|
+
self,
|
306
|
+
filepath: str = 'dataset.csv',
|
307
|
+
qa_pairs: Optional[Dict[str, str]] = None
|
308
|
+
) -> Optional[str]:
|
309
|
+
"""Export solutions in CSV format."""
|
310
|
+
try:
|
311
|
+
self._is_qa_pairs(qa_pairs) # Validate format
|
312
|
+
if qa_pairs:
|
313
|
+
self.qa_pairs.update(qa_pairs)
|
314
|
+
# Generate solutions if empty
|
315
|
+
if not self.solutions:
|
316
|
+
for question in qa_pairs:
|
317
|
+
self.cot_run_dict(question)
|
318
|
+
|
319
|
+
with open(filepath, 'w', newline='', encoding='utf-8') as f:
|
320
|
+
writer = csv.writer(f)
|
321
|
+
writer.writerow(['instruction', 'input', 'output'])
|
322
|
+
for question, sol in self.solutions.items():
|
323
|
+
writer.writerow([question, '', sol.get("thought_process", "")])
|
324
|
+
return filepath
|
325
|
+
except Exception as e:
|
326
|
+
print(f"Error exporting to CSV format: {e}")
|
327
|
+
return None
|
328
|
+
|
329
|
+
def cot_save(
|
330
|
+
self,
|
331
|
+
question: str,
|
332
|
+
answer: str,
|
333
|
+
filepath: str = 'dataset.csv'
|
334
|
+
) -> Optional[str]:
|
335
|
+
"""
|
336
|
+
Save a single question-answer pair with chain of thought to CSV file.
|
337
|
+
Creates file with headers if it doesn't exist.
|
338
|
+
"""
|
339
|
+
try:
|
340
|
+
# Add the current QA pair to self.qa_pairs
|
341
|
+
self.qa_pairs[question] = answer
|
342
|
+
|
343
|
+
# Generate solution
|
344
|
+
solution = self.cot_run_dict(question)
|
345
|
+
|
346
|
+
import csv
|
347
|
+
import os
|
348
|
+
file_exists = os.path.exists(filepath)
|
349
|
+
|
350
|
+
with open(filepath, 'a', newline='', encoding='utf-8') as f:
|
351
|
+
writer = csv.writer(f)
|
352
|
+
if not file_exists:
|
353
|
+
writer.writerow(['instruction', 'input', 'output'])
|
354
|
+
writer.writerow([question, '', solution.get("thought_process", "")])
|
355
|
+
return filepath
|
356
|
+
except Exception as e:
|
357
|
+
print(f"Error appending to CSV: {e}")
|
358
|
+
return None
|
359
|
+
|
360
|
+
# Rename existing function to indicate it handles qa_pairs dictionary
|
361
|
+
def cot_append_csv_with_qa_pairs(
|
362
|
+
self,
|
363
|
+
filepath: str = 'dataset.csv',
|
364
|
+
qa_pairs: Optional[Dict[str, str]] = None
|
365
|
+
) -> Optional[str]:
|
366
|
+
"""Append solutions to CSV file using qa_pairs dictionary."""
|
367
|
+
try:
|
368
|
+
self._is_qa_pairs(qa_pairs) # Validate format
|
369
|
+
if qa_pairs:
|
370
|
+
self.qa_pairs.update(qa_pairs)
|
371
|
+
|
372
|
+
import csv
|
373
|
+
import os
|
374
|
+
file_exists = os.path.exists(filepath)
|
375
|
+
|
376
|
+
with open(filepath, 'a', newline='', encoding='utf-8') as f:
|
377
|
+
writer = csv.writer(f)
|
378
|
+
if not file_exists:
|
379
|
+
writer.writerow(['instruction', 'input', 'output'])
|
380
|
+
|
381
|
+
for question, sol in self.solutions.items():
|
382
|
+
writer.writerow([question, '', sol.get("thought_process", "")])
|
383
|
+
return filepath
|
384
|
+
except Exception as e:
|
385
|
+
print(f"Error appending to CSV: {e}")
|
386
|
+
return None
|
387
|
+
|
388
|
+
def cot_upload_to_huggingface(
|
389
|
+
self,
|
390
|
+
huggingface_username: str,
|
391
|
+
dataset_name: str,
|
392
|
+
filepath: str,
|
393
|
+
private: bool = False
|
394
|
+
) -> str:
|
395
|
+
"""Upload generated solutions to HuggingFace datasets."""
|
396
|
+
try:
|
397
|
+
from datasets import Dataset
|
398
|
+
from huggingface_hub import HfApi, login
|
399
|
+
import pandas as pd
|
400
|
+
|
401
|
+
# Determine file type and load data
|
402
|
+
if filepath.endswith('.csv'):
|
403
|
+
data = pd.read_csv(filepath)
|
404
|
+
elif filepath.endswith('.json'):
|
405
|
+
data = pd.read_json(filepath)
|
406
|
+
else:
|
407
|
+
raise ValueError("Only CSV and JSON files are supported")
|
408
|
+
|
409
|
+
# Convert to HuggingFace dataset
|
410
|
+
dataset = Dataset.from_pandas(data)
|
411
|
+
|
412
|
+
# Upload to HuggingFace
|
413
|
+
repo_id = f"{huggingface_username}/{dataset_name}"
|
414
|
+
dataset.push_to_hub(
|
415
|
+
repo_id,
|
416
|
+
private=private
|
417
|
+
)
|
418
|
+
|
419
|
+
return f"Dataset uploaded successfully to {repo_id}"
|
420
|
+
|
421
|
+
except Exception as e:
|
422
|
+
print(f"Error uploading to HuggingFace: {e}")
|
423
|
+
return None
|
424
|
+
|
425
|
+
# Usage example:
|
426
|
+
if __name__ == "__main__":
|
427
|
+
# Direct QA Pairs Export Example
|
428
|
+
print("\n=== Direct QA Pairs Export Example ===")
|
429
|
+
direct_qa_data = {
|
430
|
+
"Number of r's in the word strawberry": "3"
|
431
|
+
}
|
432
|
+
|
433
|
+
direct_generator = GenerateCOT()
|
434
|
+
|
435
|
+
# Export with qa_pairs passed directly to functions
|
436
|
+
direct_generator.cot_export_csv_with_qa_pairs(
|
437
|
+
filepath='direct_solutions.csv',
|
438
|
+
qa_pairs=direct_qa_data
|
439
|
+
)
|
440
|
+
|
441
|
+
# Example of using cot_save for a single QA pair
|
442
|
+
direct_generator.cot_save(
|
443
|
+
question="What is the capital of France?",
|
444
|
+
answer="Paris",
|
445
|
+
filepath="single_qa.csv"
|
446
|
+
)
|
447
|
+
|
448
|
+
|
449
|
+
|
450
|
+
# Upload to HuggingFace
|
451
|
+
direct_generator.cot_upload_to_huggingface(
|
452
|
+
huggingface_username="mervinpraison",
|
453
|
+
dataset_name="cot-test",
|
454
|
+
filepath="single_qa.csv"
|
455
|
+
)
|
456
|
+
|
457
|
+
# direct_generator.cot_export_json_with_qa_pairs(
|
458
|
+
# filepath='direct_solutions.json',
|
459
|
+
# qa_pairs=direct_qa_data
|
460
|
+
# )
|
461
|
+
|
462
|
+
# # Rest of the original examples...
|
463
|
+
# qa_data = {
|
464
|
+
# "What is 2+2?": "4",
|
465
|
+
# "How many letters in 'hello'?": "5"
|
466
|
+
# }
|
467
|
+
|
468
|
+
# generator = GenerateCOT(qa_pairs=qa_data)
|
469
|
+
# for question in qa_data:
|
470
|
+
# solution = generator.cot_run(question)
|
471
|
+
# print(f"Question: {question}")
|
472
|
+
# print(f"Solution: {solution}\n")
|
473
|
+
# answer = generator.cot_run("What is 2+2?")
|
474
|
+
# print(answer)
|
475
|
+
|
476
|
+
# # Additional QA data processing example
|
477
|
+
# print("\n=== Processing Additional QA Data ===")
|
478
|
+
# extra_qa_data = {
|
479
|
+
|
480
|
+
# "What is 5 * 3?": "15"
|
481
|
+
# }
|
482
|
+
|
483
|
+
# # Create separate generator for additional data
|
484
|
+
# extra_generator = GenerateCOT(qa_pairs=extra_qa_data)
|
485
|
+
|
486
|
+
# # Process and save solutions
|
487
|
+
# for question in extra_qa_data:
|
488
|
+
# solution = extra_generator.cot_run_dict(question)
|
489
|
+
# print(f"Processing extra question: {question}")
|
490
|
+
|
491
|
+
# # Save solutions separately
|
492
|
+
# extra_generator.cot_save_solutions_with_qa_pairs('extra_qa_solutions.json')
|
493
|
+
|
494
|
+
# # Export in Alpaca format
|
495
|
+
# extra_generator.cot_export_json_with_qa_pairs(filepath='extra_qa_alpaca.json', save_to_file=True)
|
496
|
+
|
497
|
+
# # Demonstrate loading saved data
|
498
|
+
# loaded_generator = GenerateCOT(qa_pairs={})
|
499
|
+
# loaded_generator.cot_load_answers('extra_qa_solutions.json')
|
500
|
+
# print("\nLoaded extra QA pairs:", loaded_generator.qa_pairs)
|
@@ -9,16 +9,16 @@ praisonaiagents/knowledge/__init__.py,sha256=xL1Eh-a3xsHyIcU4foOWF-JdWYIYBALJH9b
|
|
9
9
|
praisonaiagents/knowledge/chunking.py,sha256=FzoNY0q8MkvG4gADqk4JcRhmH3lcEHbRdonDgitQa30,6624
|
10
10
|
praisonaiagents/knowledge/knowledge.py,sha256=fQNREDiwdoisfIxJBLVkteXgq_8Gbypfc3UaZbxf5QY,13210
|
11
11
|
praisonaiagents/llm/__init__.py,sha256=ttPQQJQq6Tah-0updoEXDZFKWtJAM93rBWRoIgxRWO8,689
|
12
|
-
praisonaiagents/llm/llm.py,sha256=
|
12
|
+
praisonaiagents/llm/llm.py,sha256=G2wKMwitWBJRS6nOq9W77zXtsxvJwsVwXFOKYcllY0E,51386
|
13
13
|
praisonaiagents/memory/memory.py,sha256=I8dOTkrl1i-GgQbDcrFOsSruzJ7MiI6Ys37DK27wrUs,35537
|
14
14
|
praisonaiagents/process/__init__.py,sha256=lkYbL7Hn5a0ldvJtkdH23vfIIZLIcanK-65C0MwaorY,52
|
15
|
-
praisonaiagents/process/process.py,sha256=
|
15
|
+
praisonaiagents/process/process.py,sha256=BrS8_4Gt2ewXt559hThJTSrXVYG8daabO9tGDaWmrm0,44906
|
16
16
|
praisonaiagents/task/__init__.py,sha256=VL5hXVmyGjINb34AalxpBMl-YW9m5EDcRkMTKkSSl7c,80
|
17
17
|
praisonaiagents/task/task.py,sha256=ikFjzNm4WPYONSLtWA3uDGNIUx_TvXTeU5SukWoC66E,14271
|
18
|
-
praisonaiagents/tools/__init__.py,sha256
|
18
|
+
praisonaiagents/tools/__init__.py,sha256=CWOYV9SudYY82r45LnNgaVRV3cmsAFdasNRkPrLsgmI,9198
|
19
19
|
praisonaiagents/tools/arxiv_tools.py,sha256=1stb31zTjLTon4jCnpZG5de9rKc9QWgC0leLegvPXWo,10528
|
20
20
|
praisonaiagents/tools/calculator_tools.py,sha256=S1xPT74Geurvjm52QMMIG29zDXVEWJmM6nmyY7yF298,9571
|
21
|
-
praisonaiagents/tools/csv_tools.py,sha256=
|
21
|
+
praisonaiagents/tools/csv_tools.py,sha256=4Yr0QYwBXt-1BDXGLalB2eSsFR2mB5rH3KdHmRBQY6E,10036
|
22
22
|
praisonaiagents/tools/duckdb_tools.py,sha256=KB3b-1HcX7ocoxskDpk_7RRpTGHnH8hizIY0ZdLRbIE,8816
|
23
23
|
praisonaiagents/tools/duckduckgo_tools.py,sha256=ynlB5ZyWfHYjUq0JZXH12TganqTihgD-2IyRgs32y84,1657
|
24
24
|
praisonaiagents/tools/excel_tools.py,sha256=e2HqcwnyBueOyss0xEKxff3zB4w4sNWCOMXvZfbDYlE,11309
|
@@ -35,7 +35,8 @@ praisonaiagents/tools/wikipedia_tools.py,sha256=pGko-f33wqXgxJTv8db7TbizY5XnzBQR
|
|
35
35
|
praisonaiagents/tools/xml_tools.py,sha256=iYTMBEk5l3L3ryQ1fkUnNVYK-Nnua2Kx2S0dxNMMs1A,17122
|
36
36
|
praisonaiagents/tools/yaml_tools.py,sha256=uogAZrhXV9O7xvspAtcTfpKSQYL2nlOTvCQXN94-G9A,14215
|
37
37
|
praisonaiagents/tools/yfinance_tools.py,sha256=s2PBj_1v7oQnOobo2fDbQBACEHl61ftG4beG6Z979ZE,8529
|
38
|
-
praisonaiagents
|
39
|
-
praisonaiagents-0.0.
|
40
|
-
praisonaiagents-0.0.
|
41
|
-
praisonaiagents-0.0.
|
38
|
+
praisonaiagents/tools/train/data/generatecot.py,sha256=k1gZHtgY1poVp5kajhgs4S9a4-epdA8NyZfYTa34lQU,17651
|
39
|
+
praisonaiagents-0.0.58.dist-info/METADATA,sha256=N_DIe_TPq4gXuySMSwVSPpsSarqo7h-_fTAIQaRFGT0,830
|
40
|
+
praisonaiagents-0.0.58.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
41
|
+
praisonaiagents-0.0.58.dist-info/top_level.txt,sha256=_HsRddrJ23iDx5TTqVUVvXG2HeHBL5voshncAMDGjtA,16
|
42
|
+
praisonaiagents-0.0.58.dist-info/RECORD,,
|
File without changes
|
File without changes
|