camel-ai 0.2.69a7__py3-none-any.whl → 0.2.71a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/societies/role_playing.py +26 -28
- camel/societies/workforce/role_playing_worker.py +4 -4
- camel/societies/workforce/single_agent_worker.py +4 -4
- camel/societies/workforce/workforce.py +462 -159
- camel/societies/workforce/workforce_logger.py +37 -24
- camel/storages/__init__.py +2 -0
- camel/storages/vectordb_storages/__init__.py +2 -0
- camel/storages/vectordb_storages/pgvector.py +349 -0
- camel/tasks/task.py +83 -7
- camel/toolkits/file_write_toolkit.py +21 -7
- camel/toolkits/human_toolkit.py +23 -8
- camel/toolkits/non_visual_browser_toolkit/browser_non_visual_toolkit.py +23 -2
- camel/toolkits/non_visual_browser_toolkit/nv_browser_session.py +53 -11
- camel/toolkits/non_visual_browser_toolkit/snapshot.js +211 -131
- camel/toolkits/non_visual_browser_toolkit/snapshot.py +9 -8
- camel/toolkits/terminal_toolkit.py +28 -20
- camel/toolkits/video_download_toolkit.py +5 -1
- camel/types/enums.py +3 -0
- {camel_ai-0.2.69a7.dist-info → camel_ai-0.2.71a1.dist-info}/METADATA +5 -1
- {camel_ai-0.2.69a7.dist-info → camel_ai-0.2.71a1.dist-info}/RECORD +23 -22
- {camel_ai-0.2.69a7.dist-info → camel_ai-0.2.71a1.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.69a7.dist-info → camel_ai-0.2.71a1.dist-info}/licenses/LICENSE +0 -0
|
@@ -488,7 +488,6 @@ class WorkforceLogger:
|
|
|
488
488
|
'worker_utilization': {},
|
|
489
489
|
'current_pending_tasks': 0,
|
|
490
490
|
'total_workforce_running_time_seconds': 0.0,
|
|
491
|
-
'avg_task_queue_time_seconds': 0.0,
|
|
492
491
|
}
|
|
493
492
|
|
|
494
493
|
task_start_times: Dict[str, float] = {}
|
|
@@ -499,54 +498,68 @@ class WorkforceLogger:
|
|
|
499
498
|
|
|
500
499
|
tasks_handled_by_worker: Dict[str, int] = {}
|
|
501
500
|
|
|
501
|
+
# Helper function to check if a task is the main task (has no parent)
|
|
502
|
+
def is_main_task(task_id: str) -> bool:
|
|
503
|
+
return (
|
|
504
|
+
task_id in self._task_hierarchy
|
|
505
|
+
and self._task_hierarchy[task_id].get('parent') is None
|
|
506
|
+
)
|
|
507
|
+
|
|
502
508
|
for entry in self.log_entries:
|
|
503
509
|
event_type = entry['event_type']
|
|
504
510
|
timestamp = datetime.fromisoformat(entry['timestamp'])
|
|
511
|
+
task_id = entry.get('task_id', '')
|
|
512
|
+
|
|
505
513
|
if first_timestamp is None or timestamp < first_timestamp:
|
|
506
514
|
first_timestamp = timestamp
|
|
507
515
|
if last_timestamp is None or timestamp > last_timestamp:
|
|
508
516
|
last_timestamp = timestamp
|
|
509
517
|
|
|
510
518
|
if event_type == 'task_created':
|
|
511
|
-
|
|
512
|
-
|
|
519
|
+
# Exclude main task from total count
|
|
520
|
+
if not is_main_task(task_id):
|
|
521
|
+
kpis['total_tasks_created'] += 1
|
|
522
|
+
task_creation_timestamps[task_id] = timestamp
|
|
513
523
|
elif event_type == 'task_assigned':
|
|
514
|
-
task_assignment_timestamps[
|
|
524
|
+
task_assignment_timestamps[task_id] = timestamp
|
|
515
525
|
# Queue time tracking has been removed
|
|
516
526
|
|
|
517
527
|
elif event_type == 'task_started':
|
|
518
528
|
# Store start time for processing time calculation
|
|
519
|
-
task_start_times[
|
|
529
|
+
task_start_times[task_id] = timestamp.timestamp()
|
|
520
530
|
|
|
521
531
|
elif event_type == 'task_completed':
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
532
|
+
# Exclude main task from total count
|
|
533
|
+
if not is_main_task(task_id):
|
|
534
|
+
kpis['total_tasks_completed'] += 1
|
|
535
|
+
# Count tasks handled by worker (only for non-main tasks)
|
|
536
|
+
if 'worker_id' in entry and entry['worker_id'] is not None:
|
|
537
|
+
worker_id = entry['worker_id']
|
|
538
|
+
tasks_handled_by_worker[worker_id] = (
|
|
539
|
+
tasks_handled_by_worker.get(worker_id, 0) + 1
|
|
540
|
+
)
|
|
529
541
|
|
|
530
|
-
if
|
|
542
|
+
if task_id in task_assignment_timestamps:
|
|
531
543
|
completion_time = (
|
|
532
|
-
timestamp
|
|
533
|
-
- task_assignment_timestamps[entry['task_id']]
|
|
544
|
+
timestamp - task_assignment_timestamps[task_id]
|
|
534
545
|
).total_seconds()
|
|
535
546
|
# Store completion time in task hierarchy instead of KPIs
|
|
536
547
|
# array
|
|
537
|
-
if
|
|
538
|
-
self._task_hierarchy[
|
|
548
|
+
if task_id in self._task_hierarchy:
|
|
549
|
+
self._task_hierarchy[task_id][
|
|
539
550
|
'completion_time_seconds'
|
|
540
551
|
] = completion_time
|
|
541
552
|
|
|
542
553
|
elif event_type == 'task_failed':
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
554
|
+
# Exclude main task from total count
|
|
555
|
+
if not is_main_task(task_id):
|
|
556
|
+
kpis['total_tasks_failed'] += 1
|
|
557
|
+
# Count tasks handled by worker (only for non-main tasks)
|
|
558
|
+
if 'worker_id' in entry and entry['worker_id'] is not None:
|
|
559
|
+
worker_id = entry['worker_id']
|
|
560
|
+
tasks_handled_by_worker[worker_id] = (
|
|
561
|
+
tasks_handled_by_worker.get(worker_id, 0) + 1
|
|
562
|
+
)
|
|
550
563
|
error_type = entry['error_type']
|
|
551
564
|
kpis['error_types_count'][error_type] = (
|
|
552
565
|
kpis['error_types_count'].get(error_type, 0) + 1
|
camel/storages/__init__.py
CHANGED
|
@@ -30,6 +30,7 @@ from .vectordb_storages.chroma import ChromaStorage
|
|
|
30
30
|
from .vectordb_storages.faiss import FaissStorage
|
|
31
31
|
from .vectordb_storages.milvus import MilvusStorage
|
|
32
32
|
from .vectordb_storages.oceanbase import OceanBaseStorage
|
|
33
|
+
from .vectordb_storages.pgvector import PgVectorStorage
|
|
33
34
|
from .vectordb_storages.qdrant import QdrantStorage
|
|
34
35
|
from .vectordb_storages.tidb import TiDBStorage
|
|
35
36
|
from .vectordb_storages.weaviate import WeaviateStorage
|
|
@@ -53,5 +54,6 @@ __all__ = [
|
|
|
53
54
|
'Mem0Storage',
|
|
54
55
|
'OceanBaseStorage',
|
|
55
56
|
'WeaviateStorage',
|
|
57
|
+
'PgVectorStorage',
|
|
56
58
|
'ChromaStorage',
|
|
57
59
|
]
|
|
@@ -23,6 +23,7 @@ from .chroma import ChromaStorage
|
|
|
23
23
|
from .faiss import FaissStorage
|
|
24
24
|
from .milvus import MilvusStorage
|
|
25
25
|
from .oceanbase import OceanBaseStorage
|
|
26
|
+
from .pgvector import PgVectorStorage
|
|
26
27
|
from .qdrant import QdrantStorage
|
|
27
28
|
from .tidb import TiDBStorage
|
|
28
29
|
from .weaviate import WeaviateStorage
|
|
@@ -40,4 +41,5 @@ __all__ = [
|
|
|
40
41
|
'WeaviateStorage',
|
|
41
42
|
'VectorRecord',
|
|
42
43
|
'VectorDBStatus',
|
|
44
|
+
'PgVectorStorage',
|
|
43
45
|
]
|
|
@@ -0,0 +1,349 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
from typing import Any, Dict, List, Optional
|
|
17
|
+
|
|
18
|
+
from camel.logger import get_logger
|
|
19
|
+
from camel.storages.vectordb_storages import (
|
|
20
|
+
BaseVectorStorage,
|
|
21
|
+
VectorDBQuery,
|
|
22
|
+
VectorDBQueryResult,
|
|
23
|
+
VectorDBStatus,
|
|
24
|
+
VectorRecord,
|
|
25
|
+
)
|
|
26
|
+
from camel.types import VectorDistance
|
|
27
|
+
from camel.utils import dependencies_required
|
|
28
|
+
|
|
29
|
+
logger = get_logger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class PgVectorStorage(BaseVectorStorage):
|
|
33
|
+
r"""PgVectorStorage is an implementation of BaseVectorStorage for
|
|
34
|
+
PostgreSQL with pgvector extension.
|
|
35
|
+
|
|
36
|
+
This class provides methods to add, delete, query, and manage vector
|
|
37
|
+
records in a PostgreSQL database using the pgvector extension.
|
|
38
|
+
It supports different distance metrics for similarity search.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
vector_dim (int): The dimension of the vectors to be stored.
|
|
42
|
+
conn_info (Dict[str, Any]): Connection information for
|
|
43
|
+
psycopg2.connect.
|
|
44
|
+
table_name (str, optional): Name of the table to store vectors.
|
|
45
|
+
(default: :obj:`None`)
|
|
46
|
+
distance (VectorDistance, optional): Distance metric for vector
|
|
47
|
+
comparison. (default: :obj:`VectorDistance.COSINE`)
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
@dependencies_required('psycopg', 'pgvector')
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
vector_dim: int,
|
|
54
|
+
conn_info: Dict[str, Any],
|
|
55
|
+
table_name: Optional[str] = None,
|
|
56
|
+
distance: VectorDistance = VectorDistance.COSINE,
|
|
57
|
+
**kwargs: Any,
|
|
58
|
+
) -> None:
|
|
59
|
+
r"""Initialize PgVectorStorage.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
vector_dim (int): The dimension of the vectors.
|
|
63
|
+
conn_info (Dict[str, Any]): Connection info for psycopg2.connect.
|
|
64
|
+
table_name (str, optional): Table name. (default: :obj:`None`)
|
|
65
|
+
distance (VectorDistance, optional): Distance metric.
|
|
66
|
+
(default: :obj:`VectorDistance.COSINE`)
|
|
67
|
+
"""
|
|
68
|
+
import psycopg
|
|
69
|
+
from pgvector.psycopg import register_vector
|
|
70
|
+
|
|
71
|
+
if vector_dim <= 0:
|
|
72
|
+
raise ValueError("vector_dim must be positive")
|
|
73
|
+
|
|
74
|
+
self.vector_dim = vector_dim
|
|
75
|
+
self.conn_info = conn_info
|
|
76
|
+
self.table_name = table_name or 'vectors'
|
|
77
|
+
self.distance = distance
|
|
78
|
+
|
|
79
|
+
try:
|
|
80
|
+
self._conn = psycopg.connect(**conn_info)
|
|
81
|
+
register_vector(self._conn)
|
|
82
|
+
self._ensure_table()
|
|
83
|
+
self._ensure_index()
|
|
84
|
+
except Exception as e:
|
|
85
|
+
logger.error(f"Failed to initialize PgVectorStorage: {e}")
|
|
86
|
+
raise
|
|
87
|
+
|
|
88
|
+
def _ensure_table(self) -> None:
|
|
89
|
+
r"""Ensure the vector table exists in the database.
|
|
90
|
+
Creates the table if it does not exist.
|
|
91
|
+
"""
|
|
92
|
+
try:
|
|
93
|
+
from psycopg.sql import SQL, Identifier, Literal
|
|
94
|
+
|
|
95
|
+
with self._conn.cursor() as cur:
|
|
96
|
+
query = SQL("""
|
|
97
|
+
CREATE TABLE IF NOT EXISTS {table} (
|
|
98
|
+
id VARCHAR PRIMARY KEY,
|
|
99
|
+
vector vector({dim}),
|
|
100
|
+
payload JSONB
|
|
101
|
+
)
|
|
102
|
+
""").format(
|
|
103
|
+
table=Identifier(self.table_name),
|
|
104
|
+
dim=Literal(self.vector_dim),
|
|
105
|
+
)
|
|
106
|
+
cur.execute(query)
|
|
107
|
+
self._conn.commit()
|
|
108
|
+
except Exception as e:
|
|
109
|
+
logger.error(f"Failed to create table {self.table_name}: {e}")
|
|
110
|
+
raise
|
|
111
|
+
|
|
112
|
+
def _ensure_index(self) -> None:
|
|
113
|
+
r"""Ensure vector similarity search index exists for better
|
|
114
|
+
performance.
|
|
115
|
+
"""
|
|
116
|
+
try:
|
|
117
|
+
from psycopg.sql import SQL, Identifier
|
|
118
|
+
|
|
119
|
+
with self._conn.cursor() as cur:
|
|
120
|
+
index_name = f"{self.table_name}_vector_idx"
|
|
121
|
+
query = SQL("""
|
|
122
|
+
CREATE INDEX IF NOT EXISTS {index_name}
|
|
123
|
+
ON {table}
|
|
124
|
+
USING hnsw (vector vector_cosine_ops)
|
|
125
|
+
""").format(
|
|
126
|
+
index_name=Identifier(index_name),
|
|
127
|
+
table=Identifier(self.table_name),
|
|
128
|
+
)
|
|
129
|
+
cur.execute(query)
|
|
130
|
+
self._conn.commit()
|
|
131
|
+
except Exception as e:
|
|
132
|
+
logger.warning(f"Failed to create vector index: {e}")
|
|
133
|
+
|
|
134
|
+
def add(self, records: List[VectorRecord], **kwargs: Any) -> None:
|
|
135
|
+
r"""Add or update vector records in the database.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
records (List[VectorRecord]): List of vector records to
|
|
139
|
+
add or update.
|
|
140
|
+
"""
|
|
141
|
+
if not records:
|
|
142
|
+
return
|
|
143
|
+
|
|
144
|
+
try:
|
|
145
|
+
with self._conn.cursor() as cur:
|
|
146
|
+
# Use batch insert for better performance
|
|
147
|
+
batch_data = []
|
|
148
|
+
for rec in records:
|
|
149
|
+
if len(rec.vector) != self.vector_dim:
|
|
150
|
+
raise ValueError(
|
|
151
|
+
f"Vector dimension mismatch: expected "
|
|
152
|
+
f"{self.vector_dim}, got {len(rec.vector)}"
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
batch_data.append(
|
|
156
|
+
(
|
|
157
|
+
rec.id,
|
|
158
|
+
rec.vector,
|
|
159
|
+
json.dumps(rec.payload)
|
|
160
|
+
if rec.payload is not None
|
|
161
|
+
else None,
|
|
162
|
+
)
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# Use executemany for efficient batch insert
|
|
166
|
+
from psycopg.sql import SQL, Identifier
|
|
167
|
+
|
|
168
|
+
query = SQL("""
|
|
169
|
+
INSERT INTO {table} (id, vector, payload)
|
|
170
|
+
VALUES (%s, %s, %s)
|
|
171
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
172
|
+
vector=EXCLUDED.vector,
|
|
173
|
+
payload=EXCLUDED.payload
|
|
174
|
+
""").format(table=Identifier(self.table_name))
|
|
175
|
+
|
|
176
|
+
cur.executemany(query, batch_data)
|
|
177
|
+
self._conn.commit()
|
|
178
|
+
except Exception as e:
|
|
179
|
+
self._conn.rollback()
|
|
180
|
+
logger.error(f"Failed to add records: {e}")
|
|
181
|
+
raise
|
|
182
|
+
|
|
183
|
+
def delete(self, ids: List[str], **kwargs: Any) -> None:
|
|
184
|
+
r"""Delete vector records from the database by their IDs.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
ids (List[str]): List of record IDs to delete.
|
|
188
|
+
"""
|
|
189
|
+
from psycopg.sql import SQL, Identifier
|
|
190
|
+
|
|
191
|
+
if not ids:
|
|
192
|
+
return
|
|
193
|
+
|
|
194
|
+
try:
|
|
195
|
+
with self._conn.cursor() as cur:
|
|
196
|
+
query = SQL("DELETE FROM {table} WHERE id = ANY(%s)").format(
|
|
197
|
+
table=Identifier(self.table_name)
|
|
198
|
+
)
|
|
199
|
+
cur.execute(query, (ids,))
|
|
200
|
+
self._conn.commit()
|
|
201
|
+
except Exception as e:
|
|
202
|
+
self._conn.rollback()
|
|
203
|
+
logger.error(f"Failed to delete records: {e}")
|
|
204
|
+
raise
|
|
205
|
+
|
|
206
|
+
def query(
|
|
207
|
+
self, query: VectorDBQuery, **kwargs: Any
|
|
208
|
+
) -> List[VectorDBQueryResult]:
|
|
209
|
+
r"""Query the database for the most similar vectors to the given
|
|
210
|
+
query vector.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
query (VectorDBQuery): Query object containing the query
|
|
214
|
+
vector and top_k.
|
|
215
|
+
**kwargs (Any): Additional keyword arguments for the query.
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
List[VectorDBQueryResult]: List of query results sorted by
|
|
219
|
+
similarity.
|
|
220
|
+
"""
|
|
221
|
+
if len(query.query_vector) != self.vector_dim:
|
|
222
|
+
raise ValueError(
|
|
223
|
+
f"Query vector dimension mismatch: "
|
|
224
|
+
f"expected {self.vector_dim}, got {len(query.query_vector)}"
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
try:
|
|
228
|
+
with self._conn.cursor() as cur:
|
|
229
|
+
# Fix distance metric mapping
|
|
230
|
+
metric_info = {
|
|
231
|
+
VectorDistance.COSINE: ('<=>', 'ASC'), # Cosine distance
|
|
232
|
+
VectorDistance.EUCLIDEAN: (
|
|
233
|
+
'<->',
|
|
234
|
+
'ASC',
|
|
235
|
+
), # Euclidean distance
|
|
236
|
+
VectorDistance.DOT: (
|
|
237
|
+
'<#>',
|
|
238
|
+
'DESC',
|
|
239
|
+
), # Negative dot product (higher is better)
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
if self.distance not in metric_info:
|
|
243
|
+
raise ValueError(
|
|
244
|
+
f"Unsupported distance metric: {self.distance}"
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
metric, order = metric_info[self.distance]
|
|
248
|
+
|
|
249
|
+
from psycopg.sql import SQL, Identifier, Literal
|
|
250
|
+
|
|
251
|
+
query_sql = SQL("""
|
|
252
|
+
SELECT id, vector, payload, (vector {} %s::vector)
|
|
253
|
+
AS similarity
|
|
254
|
+
FROM {}
|
|
255
|
+
ORDER BY similarity {}
|
|
256
|
+
LIMIT %s
|
|
257
|
+
""").format(
|
|
258
|
+
Literal(metric),
|
|
259
|
+
Identifier(self.table_name),
|
|
260
|
+
Literal(order),
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
cur.execute(query_sql, (query.query_vector, query.top_k))
|
|
264
|
+
results = []
|
|
265
|
+
for row in cur.fetchall():
|
|
266
|
+
id, vector, payload, similarity = row
|
|
267
|
+
results.append(
|
|
268
|
+
VectorDBQueryResult.create(
|
|
269
|
+
similarity=float(similarity),
|
|
270
|
+
vector=list(vector),
|
|
271
|
+
id=id,
|
|
272
|
+
payload=payload,
|
|
273
|
+
)
|
|
274
|
+
)
|
|
275
|
+
return results
|
|
276
|
+
except Exception as e:
|
|
277
|
+
logger.error(f"Failed to query vectors: {e}")
|
|
278
|
+
raise
|
|
279
|
+
|
|
280
|
+
def status(self, **kwargs: Any) -> VectorDBStatus:
|
|
281
|
+
r"""Get the status of the vector database, including vector
|
|
282
|
+
dimension and count.
|
|
283
|
+
|
|
284
|
+
Args:
|
|
285
|
+
**kwargs (Any): Additional keyword arguments for the query.
|
|
286
|
+
|
|
287
|
+
Returns:
|
|
288
|
+
VectorDBStatus: Status object with vector dimension and count.
|
|
289
|
+
"""
|
|
290
|
+
try:
|
|
291
|
+
with self._conn.cursor() as cur:
|
|
292
|
+
from psycopg.sql import SQL, Identifier
|
|
293
|
+
|
|
294
|
+
query = SQL('SELECT COUNT(*) FROM {}').format(
|
|
295
|
+
Identifier(self.table_name)
|
|
296
|
+
)
|
|
297
|
+
cur.execute(query)
|
|
298
|
+
result = cur.fetchone()
|
|
299
|
+
count = result[0] if result else 0
|
|
300
|
+
return VectorDBStatus(
|
|
301
|
+
vector_dim=self.vector_dim, vector_count=count
|
|
302
|
+
)
|
|
303
|
+
except Exception as e:
|
|
304
|
+
logger.error(f"Failed to get status: {e}")
|
|
305
|
+
raise
|
|
306
|
+
|
|
307
|
+
def clear(self) -> None:
|
|
308
|
+
r"""Remove all vectors from the storage by truncating the table."""
|
|
309
|
+
try:
|
|
310
|
+
with self._conn.cursor() as cur:
|
|
311
|
+
from psycopg.sql import SQL, Identifier
|
|
312
|
+
|
|
313
|
+
query = SQL("TRUNCATE TABLE {table}").format(
|
|
314
|
+
table=Identifier(self.table_name)
|
|
315
|
+
)
|
|
316
|
+
cur.execute(query)
|
|
317
|
+
self._conn.commit()
|
|
318
|
+
except Exception as e:
|
|
319
|
+
self._conn.rollback()
|
|
320
|
+
logger.error(f"Failed to clear table: {e}")
|
|
321
|
+
raise
|
|
322
|
+
|
|
323
|
+
def load(self) -> None:
|
|
324
|
+
r"""Load the collection hosted on cloud service (no-op for pgvector).
|
|
325
|
+
This method is provided for interface compatibility.
|
|
326
|
+
"""
|
|
327
|
+
# For PostgreSQL local/managed instances, no loading is required
|
|
328
|
+
pass
|
|
329
|
+
|
|
330
|
+
def close(self) -> None:
|
|
331
|
+
r"""Close the database connection."""
|
|
332
|
+
if hasattr(self, '_conn') and self._conn:
|
|
333
|
+
try:
|
|
334
|
+
self._conn.close()
|
|
335
|
+
except Exception as e:
|
|
336
|
+
logger.warning(f"Error closing connection: {e}")
|
|
337
|
+
|
|
338
|
+
def __del__(self) -> None:
|
|
339
|
+
r"""Ensure connection is closed when object is destroyed."""
|
|
340
|
+
self.close()
|
|
341
|
+
|
|
342
|
+
@property
|
|
343
|
+
def client(self) -> Any:
|
|
344
|
+
r"""Provides access to the underlying vector database client.
|
|
345
|
+
|
|
346
|
+
Returns:
|
|
347
|
+
Any: The underlying psycopg connection object.
|
|
348
|
+
"""
|
|
349
|
+
return self._conn
|
camel/tasks/task.py
CHANGED
|
@@ -46,19 +46,35 @@ from .task_prompt import (
|
|
|
46
46
|
logger = get_logger(__name__)
|
|
47
47
|
|
|
48
48
|
|
|
49
|
+
class TaskValidationMode(Enum):
|
|
50
|
+
r"""Validation modes for different use cases."""
|
|
51
|
+
|
|
52
|
+
INPUT = "input" # For validating task content before processing
|
|
53
|
+
OUTPUT = "output" # For validating task results after completion
|
|
54
|
+
|
|
55
|
+
|
|
49
56
|
def validate_task_content(
|
|
50
|
-
content: str,
|
|
57
|
+
content: str,
|
|
58
|
+
task_id: str = "unknown",
|
|
59
|
+
min_length: int = 5,
|
|
60
|
+
mode: TaskValidationMode = TaskValidationMode.INPUT,
|
|
61
|
+
check_failure_patterns: bool = True,
|
|
51
62
|
) -> bool:
|
|
52
|
-
r"""
|
|
53
|
-
|
|
54
|
-
|
|
63
|
+
r"""Unified validation for task content and results to avoid silent
|
|
64
|
+
failures. Performs comprehensive checks to ensure content meets quality
|
|
65
|
+
standards.
|
|
55
66
|
|
|
56
67
|
Args:
|
|
57
|
-
content (str): The task result
|
|
68
|
+
content (str): The task content or result to validate.
|
|
58
69
|
task_id (str): Task ID for logging purposes.
|
|
59
70
|
(default: :obj:`"unknown"`)
|
|
60
71
|
min_length (int): Minimum content length after stripping whitespace.
|
|
61
|
-
(default: :obj:`
|
|
72
|
+
(default: :obj:`5`)
|
|
73
|
+
mode (TaskValidationMode): Validation mode - INPUT for task content,
|
|
74
|
+
OUTPUT for task results. (default: :obj:`TaskValidationMode.INPUT`)
|
|
75
|
+
check_failure_patterns (bool): Whether to check for failure indicators
|
|
76
|
+
in the content. Only effective in OUTPUT mode.
|
|
77
|
+
(default: :obj:`True`)
|
|
62
78
|
|
|
63
79
|
Returns:
|
|
64
80
|
bool: True if content passes validation, False otherwise.
|
|
@@ -85,14 +101,70 @@ def validate_task_content(
|
|
|
85
101
|
)
|
|
86
102
|
return False
|
|
87
103
|
|
|
104
|
+
# 4: For OUTPUT mode, check for failure patterns if enabled
|
|
105
|
+
if mode == TaskValidationMode.OUTPUT and check_failure_patterns:
|
|
106
|
+
content_lower = stripped_content.lower()
|
|
107
|
+
|
|
108
|
+
# Check for explicit failure indicators
|
|
109
|
+
failure_indicators = [
|
|
110
|
+
"i cannot complete",
|
|
111
|
+
"i cannot do",
|
|
112
|
+
"task failed",
|
|
113
|
+
"unable to complete",
|
|
114
|
+
"cannot be completed",
|
|
115
|
+
"failed to complete",
|
|
116
|
+
"i cannot",
|
|
117
|
+
"not possible",
|
|
118
|
+
"impossible to",
|
|
119
|
+
"cannot perform",
|
|
120
|
+
]
|
|
121
|
+
|
|
122
|
+
if any(indicator in content_lower for indicator in failure_indicators):
|
|
123
|
+
logger.warning(
|
|
124
|
+
f"Task {task_id}: Failure indicator detected in result. "
|
|
125
|
+
f"Content preview: '{stripped_content[:100]}...'"
|
|
126
|
+
)
|
|
127
|
+
return False
|
|
128
|
+
|
|
129
|
+
# Check for responses that are just error messages or refusals
|
|
130
|
+
if content_lower.startswith(("error", "failed", "cannot", "unable")):
|
|
131
|
+
logger.warning(
|
|
132
|
+
f"Task {task_id}: Error/refusal pattern detected at start. "
|
|
133
|
+
f"Content preview: '{stripped_content[:100]}...'"
|
|
134
|
+
)
|
|
135
|
+
return False
|
|
136
|
+
|
|
88
137
|
# All validation checks passed
|
|
89
138
|
logger.debug(
|
|
90
|
-
f"Task {task_id}:
|
|
139
|
+
f"Task {task_id}: {mode.value} validation passed "
|
|
91
140
|
f"({len(stripped_content)} chars)"
|
|
92
141
|
)
|
|
93
142
|
return True
|
|
94
143
|
|
|
95
144
|
|
|
145
|
+
def is_task_result_insufficient(task: "Task") -> bool:
|
|
146
|
+
r"""Check if a task result is insufficient and should be treated as failed.
|
|
147
|
+
|
|
148
|
+
This is a convenience wrapper around validate_task_content for backward
|
|
149
|
+
compatibility and semantic clarity when checking task results.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
task (Task): The task to check.
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
bool: True if the result is insufficient, False otherwise.
|
|
156
|
+
"""
|
|
157
|
+
if not hasattr(task, 'result') or task.result is None:
|
|
158
|
+
return True
|
|
159
|
+
|
|
160
|
+
return not validate_task_content(
|
|
161
|
+
content=task.result,
|
|
162
|
+
task_id=task.id,
|
|
163
|
+
mode=TaskValidationMode.OUTPUT,
|
|
164
|
+
check_failure_patterns=True,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
|
|
96
168
|
def parse_response(
|
|
97
169
|
response: str, task_id: Optional[str] = None
|
|
98
170
|
) -> List["Task"]:
|
|
@@ -157,6 +229,8 @@ class Task(BaseModel):
|
|
|
157
229
|
(default: :obj:`""`)
|
|
158
230
|
failure_count (int): The failure count for the task.
|
|
159
231
|
(default: :obj:`0`)
|
|
232
|
+
assigned_worker_id (Optional[str]): The ID of the worker assigned to
|
|
233
|
+
this task. (default: :obj:`None`)
|
|
160
234
|
additional_info (Optional[Dict[str, Any]]): Additional information for
|
|
161
235
|
the task. (default: :obj:`None`)
|
|
162
236
|
image_list (Optional[List[Image.Image]]): Optional list of PIL Image
|
|
@@ -187,6 +261,8 @@ class Task(BaseModel):
|
|
|
187
261
|
|
|
188
262
|
failure_count: int = 0
|
|
189
263
|
|
|
264
|
+
assigned_worker_id: Optional[str] = None
|
|
265
|
+
|
|
190
266
|
additional_info: Optional[Dict[str, Any]] = None
|
|
191
267
|
|
|
192
268
|
image_list: Optional[List[Image.Image]] = None
|
|
@@ -176,26 +176,40 @@ class FileWriteToolkit(BaseToolkit):
|
|
|
176
176
|
|
|
177
177
|
doc = Document(documentclass="article")
|
|
178
178
|
doc.packages.append(Command('usepackage', 'amsmath'))
|
|
179
|
-
|
|
180
179
|
with doc.create(Section('Generated Content')):
|
|
181
180
|
for line in content.split('\n'):
|
|
182
|
-
# Remove leading whitespace
|
|
183
181
|
stripped_line = line.strip()
|
|
184
|
-
|
|
185
|
-
#
|
|
182
|
+
|
|
183
|
+
# Skip empty lines
|
|
184
|
+
if not stripped_line:
|
|
185
|
+
continue
|
|
186
|
+
|
|
187
|
+
# Convert Markdown-like headers
|
|
188
|
+
if stripped_line.startswith('## '):
|
|
189
|
+
header = stripped_line[3:]
|
|
190
|
+
doc.append(NoEscape(r'\subsection*{%s}' % header))
|
|
191
|
+
continue
|
|
192
|
+
elif stripped_line.startswith('# '):
|
|
193
|
+
header = stripped_line[2:]
|
|
194
|
+
doc.append(NoEscape(r'\section*{%s}' % header))
|
|
195
|
+
continue
|
|
196
|
+
elif stripped_line.strip() == '---':
|
|
197
|
+
doc.append(NoEscape(r'\hrule'))
|
|
198
|
+
continue
|
|
199
|
+
|
|
200
|
+
# Detect standalone math expressions like $...$
|
|
186
201
|
if (
|
|
187
202
|
stripped_line.startswith('$')
|
|
188
203
|
and stripped_line.endswith('$')
|
|
189
204
|
and len(stripped_line) > 1
|
|
190
205
|
):
|
|
191
|
-
# Extract content between the '$' delimiters
|
|
192
206
|
math_data = stripped_line[1:-1]
|
|
193
207
|
doc.append(Math(data=math_data))
|
|
194
208
|
else:
|
|
195
|
-
doc.append(NoEscape(
|
|
209
|
+
doc.append(NoEscape(stripped_line))
|
|
196
210
|
doc.append(NoEscape(r'\par'))
|
|
197
211
|
|
|
198
|
-
|
|
212
|
+
doc.generate_pdf(str(file_path), clean_tex=True)
|
|
199
213
|
|
|
200
214
|
logger.info(f"Wrote PDF (with LaTeX) to {file_path}")
|
|
201
215
|
else:
|