@aj-archipelago/cortex 1.3.65 → 1.3.67
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/helper-apps/cortex-autogen2/Dockerfile +88 -21
- package/helper-apps/cortex-autogen2/docker-compose.yml +15 -8
- package/helper-apps/cortex-autogen2/host.json +5 -0
- package/helper-apps/cortex-autogen2/pyproject.toml +82 -25
- package/helper-apps/cortex-autogen2/requirements.txt +84 -14
- package/helper-apps/cortex-autogen2/services/redis_publisher.py +129 -3
- package/helper-apps/cortex-autogen2/task_processor.py +432 -116
- package/helper-apps/cortex-autogen2/tools/__init__.py +2 -0
- package/helper-apps/cortex-autogen2/tools/azure_blob_tools.py +32 -0
- package/helper-apps/cortex-autogen2/tools/azure_foundry_agents.py +50 -14
- package/helper-apps/cortex-autogen2/tools/file_tools.py +169 -44
- package/helper-apps/cortex-autogen2/tools/google_cse.py +117 -0
- package/helper-apps/cortex-autogen2/tools/search_tools.py +655 -98
- package/lib/entityConstants.js +1 -1
- package/lib/pathwayManager.js +42 -8
- package/lib/pathwayTools.js +3 -3
- package/lib/util.js +58 -2
- package/package.json +1 -1
- package/pathways/system/entity/memory/sys_memory_format.js +1 -0
- package/pathways/system/entity/memory/sys_memory_manager.js +3 -3
- package/pathways/system/entity/sys_entity_start.js +1 -1
- package/pathways/system/entity/tools/sys_tool_bing_search_afagent.js +2 -0
- package/pathways/system/entity/tools/sys_tool_codingagent.js +2 -2
- package/pathways/system/entity/tools/sys_tool_google_search.js +3 -3
- package/pathways/system/entity/tools/sys_tool_grok_x_search.js +12 -2
- package/pathways/system/workspaces/run_workspace_prompt.js +0 -3
- package/server/executeWorkspace.js +381 -0
- package/server/graphql.js +5 -180
- package/server/pathwayResolver.js +3 -3
- package/server/plugins/apptekTranslatePlugin.js +2 -2
- package/server/plugins/azureFoundryAgentsPlugin.js +1 -1
- package/tests/unit/core/parser.test.js +0 -1
- package/tests/unit/core/pathwayManagerWithFiles.test.js +256 -0
- package/tests/unit/graphql_executeWorkspace_transformation.test.js +244 -0
- package/tests/unit/server/graphql.test.js +122 -1
|
@@ -5,6 +5,7 @@ Contains various tool modules for agent capabilities.
|
|
|
5
5
|
|
|
6
6
|
from .search_tools import web_search, image_search, combined_search, fetch_webpage, collect_task_images
|
|
7
7
|
from .coding_tools import execute_code
|
|
8
|
+
from .google_cse import google_cse_search
|
|
8
9
|
from .azure_blob_tools import upload_file_to_azure_blob
|
|
9
10
|
from .file_tools import list_files_in_work_dir, read_file_from_work_dir, get_file_info, create_file, download_image
|
|
10
11
|
|
|
@@ -15,6 +16,7 @@ __all__ = [
|
|
|
15
16
|
"fetch_webpage",
|
|
16
17
|
"collect_task_images",
|
|
17
18
|
"execute_code",
|
|
19
|
+
"google_cse_search",
|
|
18
20
|
"upload_file_to_azure_blob",
|
|
19
21
|
"list_files_in_work_dir",
|
|
20
22
|
"read_file_from_work_dir",
|
|
@@ -8,13 +8,22 @@ import logging
|
|
|
8
8
|
import mimetypes
|
|
9
9
|
import uuid
|
|
10
10
|
import time
|
|
11
|
+
import hashlib
|
|
11
12
|
from datetime import datetime, timedelta
|
|
12
13
|
from urllib.parse import urlparse, parse_qs
|
|
13
14
|
from azure.storage.blob import BlobServiceClient, generate_blob_sas, BlobSasPermissions, ContentSettings
|
|
14
15
|
from azure.core.exceptions import AzureError, ServiceResponseError
|
|
16
|
+
import requests
|
|
15
17
|
|
|
16
18
|
logger = logging.getLogger(__name__)
|
|
17
19
|
|
|
20
|
+
# Ensure correct MIME types for Office files, especially PPT/PPTX, for proper downloads in browsers
|
|
21
|
+
try:
|
|
22
|
+
mimetypes.add_type("application/vnd.openxmlformats-officedocument.presentationml.presentation", ".pptx", strict=False)
|
|
23
|
+
mimetypes.add_type("application/vnd.ms-powerpoint", ".ppt", strict=False)
|
|
24
|
+
except Exception:
|
|
25
|
+
pass
|
|
26
|
+
|
|
18
27
|
def _validate_sas_url(url: str) -> bool:
|
|
19
28
|
"""Private helper to validate an Azure blob SAS URL."""
|
|
20
29
|
try:
|
|
@@ -56,6 +65,8 @@ class AzureBlobUploader:
|
|
|
56
65
|
|
|
57
66
|
self.connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
|
|
58
67
|
self.container_name = os.getenv("AZURE_BLOB_CONTAINER", "autogentempfiles")
|
|
68
|
+
# In-memory deduplication cache: sha256 -> blob_name
|
|
69
|
+
self._sha256_to_blob: dict = {}
|
|
59
70
|
|
|
60
71
|
if not self.connection_string:
|
|
61
72
|
raise ValueError("AZURE_STORAGE_CONNECTION_STRING environment variable is required")
|
|
@@ -136,6 +147,22 @@ class AzureBlobUploader:
|
|
|
136
147
|
except Exception:
|
|
137
148
|
pass
|
|
138
149
|
|
|
150
|
+
# Compute sha256 to deduplicate repeat uploads during same process lifetime
|
|
151
|
+
sha256_hex = None
|
|
152
|
+
try:
|
|
153
|
+
hasher = hashlib.sha256()
|
|
154
|
+
with open(file_path, "rb") as fh:
|
|
155
|
+
for chunk in iter(lambda: fh.read(1024 * 1024), b""):
|
|
156
|
+
hasher.update(chunk)
|
|
157
|
+
sha256_hex = hasher.hexdigest()
|
|
158
|
+
if sha256_hex in self._sha256_to_blob:
|
|
159
|
+
# Return prior URL for identical content
|
|
160
|
+
prior_blob = self._sha256_to_blob[sha256_hex]
|
|
161
|
+
sas_url = self.generate_sas_url(prior_blob)
|
|
162
|
+
return {"blob_name": prior_blob, "download_url": sas_url, "deduplicated": True}
|
|
163
|
+
except Exception:
|
|
164
|
+
sha256_hex = None
|
|
165
|
+
|
|
139
166
|
# Simple upload; SDK will handle block uploads automatically for large blobs
|
|
140
167
|
with open(file_path, "rb") as data:
|
|
141
168
|
blob_client.upload_blob(
|
|
@@ -149,6 +176,11 @@ class AzureBlobUploader:
|
|
|
149
176
|
sas_url = self.generate_sas_url(normalized_blob_name)
|
|
150
177
|
if not _validate_sas_url(sas_url):
|
|
151
178
|
raise Exception("Generated SAS URL failed validation.")
|
|
179
|
+
if sha256_hex:
|
|
180
|
+
try:
|
|
181
|
+
self._sha256_to_blob[sha256_hex] = normalized_blob_name
|
|
182
|
+
except Exception:
|
|
183
|
+
pass
|
|
152
184
|
return {"blob_name": blob_name, "download_url": sas_url}
|
|
153
185
|
|
|
154
186
|
# Keep a single function for external calls to use the singleton uploader
|
|
@@ -110,6 +110,32 @@ except Exception:
|
|
|
110
110
|
_AZURE_SDK_AVAILABLE = False
|
|
111
111
|
|
|
112
112
|
|
|
113
|
+
def _normalize_content_to_parts(content: Any) -> List[Dict[str, Any]]:
|
|
114
|
+
"""Convert arbitrary content to Azure Foundry content parts.
|
|
115
|
+
|
|
116
|
+
Rules:
|
|
117
|
+
- If already an array of typed objects ({type:..., ...}), keep as-is.
|
|
118
|
+
- If it's a dict with 'type', wrap as a single part.
|
|
119
|
+
- Otherwise stringify to a single text part.
|
|
120
|
+
"""
|
|
121
|
+
try:
|
|
122
|
+
# Already properly typed parts
|
|
123
|
+
if isinstance(content, list) and all(isinstance(p, dict) and isinstance(p.get("type"), str) for p in content):
|
|
124
|
+
return content
|
|
125
|
+
# Single typed object
|
|
126
|
+
if isinstance(content, dict) and isinstance(content.get("type"), str):
|
|
127
|
+
return [content]
|
|
128
|
+
# Anything else -> stringify
|
|
129
|
+
import json as _json
|
|
130
|
+
if isinstance(content, (dict, list)):
|
|
131
|
+
text = _json.dumps(content)
|
|
132
|
+
else:
|
|
133
|
+
text = str(content) if content is not None else ""
|
|
134
|
+
return [{"type": "text", "text": text}]
|
|
135
|
+
except Exception:
|
|
136
|
+
return [{"type": "text", "text": str(content) if content is not None else ""}]
|
|
137
|
+
|
|
138
|
+
|
|
113
139
|
def _convert_to_azure_foundry_messages(
|
|
114
140
|
context: Optional[str],
|
|
115
141
|
examples: Optional[List[Dict[str, Any]]],
|
|
@@ -118,26 +144,26 @@ def _convert_to_azure_foundry_messages(
|
|
|
118
144
|
azure_messages: List[Dict[str, Any]] = []
|
|
119
145
|
|
|
120
146
|
if context:
|
|
121
|
-
azure_messages.append({"role": "system", "content": context})
|
|
147
|
+
azure_messages.append({"role": "system", "content": _normalize_content_to_parts(context)})
|
|
122
148
|
|
|
123
149
|
if examples:
|
|
124
150
|
for example in examples:
|
|
125
151
|
try:
|
|
126
152
|
inp = example.get("input", {})
|
|
127
153
|
out = example.get("output", {})
|
|
128
|
-
azure_messages.append({"role": inp.get("author", "user"), "content": inp.get("content")})
|
|
129
|
-
azure_messages.append({"role": out.get("author", "assistant"), "content": out.get("content")})
|
|
154
|
+
azure_messages.append({"role": inp.get("author", "user"), "content": _normalize_content_to_parts(inp.get("content"))})
|
|
155
|
+
azure_messages.append({"role": out.get("author", "assistant"), "content": _normalize_content_to_parts(out.get("content"))})
|
|
130
156
|
except Exception:
|
|
131
157
|
# ignore malformed example
|
|
132
158
|
continue
|
|
133
159
|
|
|
134
160
|
for message in messages or []:
|
|
135
|
-
|
|
136
|
-
#
|
|
137
|
-
if
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
161
|
+
role = message.get("role") or message.get("author")
|
|
162
|
+
# Normalize any unexpected roles to 'user' to satisfy Foundry role schema
|
|
163
|
+
if role not in ("user", "assistant", "system"):
|
|
164
|
+
role = "user"
|
|
165
|
+
content = message.get("content")
|
|
166
|
+
azure_messages.append({"role": role, "content": _normalize_content_to_parts(content)})
|
|
141
167
|
|
|
142
168
|
return azure_messages
|
|
143
169
|
|
|
@@ -285,8 +311,9 @@ def call_azure_foundry_agent(
|
|
|
285
311
|
On failure returns {"status":"error","error": "..."}
|
|
286
312
|
"""
|
|
287
313
|
try:
|
|
288
|
-
# Prefer using the Azure SDK path
|
|
289
|
-
|
|
314
|
+
# Prefer using the Azure SDK path only when explicitly enabled via env var
|
|
315
|
+
# Set AZURE_FOUNDRY_USE_SDK=true to enable. Default is to use HTTP path.
|
|
316
|
+
if _AZURE_SDK_AVAILABLE and str(os.getenv("AZURE_FOUNDRY_USE_SDK", "false")).lower() == "true":
|
|
290
317
|
try:
|
|
291
318
|
# Build credential: prefer explicit service principal creds in env var, else DefaultAzureCredential
|
|
292
319
|
cred = None
|
|
@@ -455,8 +482,17 @@ def call_azure_foundry_agent(
|
|
|
455
482
|
"stream": bool(parameters.get("stream") if parameters else False),
|
|
456
483
|
}
|
|
457
484
|
|
|
458
|
-
#
|
|
485
|
+
# Sanitize and merge allowed parameter keys into body
|
|
459
486
|
if parameters:
|
|
487
|
+
# Make a shallow copy so we can normalize values safely
|
|
488
|
+
sanitized_params = dict(parameters)
|
|
489
|
+
# Coerce unsupported response_format values to 'auto'
|
|
490
|
+
try:
|
|
491
|
+
rf = sanitized_params.get("response_format")
|
|
492
|
+
if isinstance(rf, str) and rf.lower() != "auto":
|
|
493
|
+
sanitized_params["response_format"] = "auto"
|
|
494
|
+
except Exception:
|
|
495
|
+
pass
|
|
460
496
|
allowed_keys = [
|
|
461
497
|
"tools",
|
|
462
498
|
"tool_resources",
|
|
@@ -472,8 +508,8 @@ def call_azure_foundry_agent(
|
|
|
472
508
|
"truncation_strategy",
|
|
473
509
|
]
|
|
474
510
|
for k in allowed_keys:
|
|
475
|
-
if k in
|
|
476
|
-
body[k] =
|
|
511
|
+
if k in sanitized_params:
|
|
512
|
+
body[k] = sanitized_params[k]
|
|
477
513
|
|
|
478
514
|
url = project_url.rstrip("/") + "/threads/runs"
|
|
479
515
|
headers = {"Content-Type": "application/json"}
|
|
@@ -209,41 +209,74 @@ async def download_image(url: str, filename: str, work_dir: Optional[str] = None
|
|
|
209
209
|
"Chrome/125.0.0.0 Safari/537.36"
|
|
210
210
|
)
|
|
211
211
|
session = requests.Session()
|
|
212
|
-
session.headers.update({
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
212
|
+
session.headers.update({
|
|
213
|
+
"User-Agent": BROWSER_UA,
|
|
214
|
+
"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
|
215
|
+
"Accept-Language": "en-US,en;q=0.9",
|
|
216
|
+
"Referer": "https://duckduckgo.com/",
|
|
217
|
+
"Cache-Control": "no-cache",
|
|
218
|
+
})
|
|
219
|
+
|
|
220
|
+
# Attempt to derive an original Wikimedia URL if this is a thumbnail
|
|
221
|
+
wm_orig = None
|
|
222
|
+
try:
|
|
223
|
+
if "upload.wikimedia.org" in url and "/thumb/" in url:
|
|
224
|
+
parts = url.split("/thumb/")
|
|
225
|
+
if len(parts) == 2:
|
|
226
|
+
tail = parts[1]
|
|
227
|
+
segs = tail.split("/")
|
|
228
|
+
if len(segs) >= 3:
|
|
229
|
+
wm_orig = parts[0] + "/" + segs[0] + "/" + segs[1] + "/" + segs[2]
|
|
230
|
+
except Exception:
|
|
231
|
+
wm_orig = None
|
|
232
|
+
|
|
233
|
+
candidates = []
|
|
234
|
+
if wm_orig:
|
|
235
|
+
candidates.append(wm_orig)
|
|
236
|
+
candidates.append(url)
|
|
237
|
+
|
|
238
|
+
last_err = None
|
|
239
|
+
for candidate in candidates:
|
|
240
|
+
try:
|
|
241
|
+
with session.get(candidate, stream=True, timeout=25, allow_redirects=True) as response:
|
|
242
|
+
response.raise_for_status()
|
|
243
|
+
|
|
244
|
+
content_type = (response.headers.get("Content-Type") or "").lower()
|
|
245
|
+
|
|
246
|
+
# Peek first few bytes to validate image magic if header is missing/misleading
|
|
247
|
+
first_chunk = next(response.iter_content(chunk_size=4096), b"")
|
|
248
|
+
|
|
249
|
+
def looks_like_image(buf: bytes) -> bool:
|
|
250
|
+
if not buf or len(buf) < 4:
|
|
251
|
+
return False
|
|
252
|
+
sigs = [
|
|
253
|
+
b"\x89PNG\r\n\x1a\n", # PNG
|
|
254
|
+
b"\xff\xd8\xff", # JPEG
|
|
255
|
+
b"GIF87a", b"GIF89a", # GIF
|
|
256
|
+
b"RIFF" # WEBP starts with RIFF
|
|
257
|
+
]
|
|
258
|
+
return any(buf.startswith(sig) for sig in sigs)
|
|
259
|
+
|
|
260
|
+
if not (content_type.startswith("image/") or looks_like_image(first_chunk)):
|
|
261
|
+
last_err = f"Non-image content-type: {content_type} for {candidate}"
|
|
262
|
+
continue
|
|
263
|
+
|
|
264
|
+
# Write first chunk then stream the rest
|
|
265
|
+
with open(file_path, 'wb') as f:
|
|
266
|
+
if first_chunk:
|
|
267
|
+
f.write(first_chunk)
|
|
268
|
+
for chunk in response.iter_content(chunk_size=8192):
|
|
269
|
+
if chunk:
|
|
270
|
+
f.write(chunk)
|
|
271
|
+
|
|
272
|
+
logger.info(f"✅ Successfully downloaded image from {candidate} to {file_path}")
|
|
273
|
+
return json.dumps({"status": "success", "file_path": file_path})
|
|
274
|
+
except Exception as e:
|
|
275
|
+
last_err = str(e)
|
|
276
|
+
continue
|
|
277
|
+
|
|
278
|
+
logger.error(f"❌ Failed to download image after candidates. Last error: {last_err}")
|
|
279
|
+
return json.dumps({"status": "error", "message": last_err or "download_failed"})
|
|
247
280
|
except Exception as e:
|
|
248
281
|
logger.error(f"❌ Failed to download image from {url}: {e}")
|
|
249
282
|
return json.dumps({"status": "error", "message": str(e)})
|
|
@@ -513,33 +546,125 @@ def get_file_tools(executor_work_dir: Optional[str] = None) -> List[FunctionTool
|
|
|
513
546
|
tools = []
|
|
514
547
|
|
|
515
548
|
# Create partial functions with work_dir bound
|
|
516
|
-
def
|
|
517
|
-
return
|
|
549
|
+
def _to_text_part(value: str) -> Dict[str, Any]:
|
|
550
|
+
# Always return OpenAI-typed content
|
|
551
|
+
return {"type": "text", "text": value if isinstance(value, str) else str(value)}
|
|
552
|
+
|
|
553
|
+
def bound_list_files_typed() -> Dict[str, Any]:
|
|
554
|
+
return _to_text_part(asyncio.run(list_files_in_work_dir(executor_work_dir)))
|
|
518
555
|
|
|
519
|
-
def
|
|
520
|
-
return asyncio.run(read_file_from_work_dir(filename, executor_work_dir, max_length))
|
|
556
|
+
def bound_read_file_typed(filename: str, max_length: int = 5000) -> Dict[str, Any]:
|
|
557
|
+
return _to_text_part(asyncio.run(read_file_from_work_dir(filename, executor_work_dir, max_length)))
|
|
521
558
|
|
|
522
|
-
def
|
|
523
|
-
return asyncio.run(get_file_info(filename, executor_work_dir))
|
|
559
|
+
def bound_get_file_info_typed(filename: str) -> Dict[str, Any]:
|
|
560
|
+
return _to_text_part(asyncio.run(get_file_info(filename, executor_work_dir)))
|
|
524
561
|
|
|
525
562
|
# Add tools
|
|
526
563
|
tools.append(FunctionTool(
|
|
527
|
-
|
|
564
|
+
bound_list_files_typed,
|
|
528
565
|
name="list_files_in_work_dir",
|
|
529
566
|
description="Intelligently discover and categorize all files in the working directory with comprehensive metadata"
|
|
530
567
|
))
|
|
531
568
|
|
|
532
569
|
tools.append(FunctionTool(
|
|
533
|
-
|
|
570
|
+
bound_read_file_typed,
|
|
534
571
|
name="read_file_from_work_dir",
|
|
535
572
|
description="Intelligently read and analyze any file type with automatic content detection and preview generation"
|
|
536
573
|
))
|
|
537
574
|
|
|
538
575
|
tools.append(FunctionTool(
|
|
539
|
-
|
|
576
|
+
bound_get_file_info_typed,
|
|
540
577
|
name="get_file_info",
|
|
541
578
|
description="Get comprehensive metadata and analysis for any file type including permissions and recommendations"
|
|
542
579
|
))
|
|
580
|
+
|
|
581
|
+
# Add a convenience uploader for the newest deliverables
|
|
582
|
+
async def _upload_recent_deliverables(max_age_minutes: int = 15, max_files: int = 5) -> str:
|
|
583
|
+
try:
|
|
584
|
+
from .azure_blob_tools import upload_file_to_azure_blob
|
|
585
|
+
import time
|
|
586
|
+
work_dir = executor_work_dir or os.getcwd()
|
|
587
|
+
now = time.time()
|
|
588
|
+
deliverable_exts = {".pptx", ".ppt", ".csv", ".png", ".jpg", ".jpeg", ".pdf", ".zip"}
|
|
589
|
+
candidates: List[str] = []
|
|
590
|
+
if os.path.isdir(work_dir):
|
|
591
|
+
for name in os.listdir(work_dir):
|
|
592
|
+
path = os.path.join(work_dir, name)
|
|
593
|
+
if os.path.isfile(path) and os.path.splitext(name)[1].lower() in deliverable_exts:
|
|
594
|
+
try:
|
|
595
|
+
mtime = os.path.getmtime(path)
|
|
596
|
+
if now - mtime <= max_age_minutes * 60:
|
|
597
|
+
candidates.append(path)
|
|
598
|
+
except Exception:
|
|
599
|
+
continue
|
|
600
|
+
candidates.sort(key=lambda p: os.path.getmtime(p), reverse=True)
|
|
601
|
+
uploads = []
|
|
602
|
+
for p in candidates[:max_files]:
|
|
603
|
+
try:
|
|
604
|
+
up_json = upload_file_to_azure_blob(p)
|
|
605
|
+
uploads.append(json.loads(up_json))
|
|
606
|
+
except Exception as e:
|
|
607
|
+
uploads.append({"error": str(e), "file": p})
|
|
608
|
+
return json.dumps({"uploads": uploads})
|
|
609
|
+
except Exception as e:
|
|
610
|
+
return json.dumps({"error": str(e)})
|
|
611
|
+
|
|
612
|
+
def bound_upload_recent_deliverables_typed(max_age_minutes: int = 15, max_files: int = 5) -> Dict[str, Any]:
|
|
613
|
+
return _to_text_part(asyncio.run(_upload_recent_deliverables(max_age_minutes, max_files)))
|
|
614
|
+
|
|
615
|
+
tools.append(FunctionTool(
|
|
616
|
+
bound_upload_recent_deliverables_typed,
|
|
617
|
+
name="upload_recent_deliverables",
|
|
618
|
+
description="Upload the newest deliverables from the working directory (scans last N minutes) and return their URLs"
|
|
619
|
+
))
|
|
620
|
+
|
|
621
|
+
# A suggestion-only tool: list likely deliverables without uploading
|
|
622
|
+
async def _list_recent_deliverables(max_age_minutes: int = 15, max_files: int = 10, min_size_bytes: int = 1024) -> str:
|
|
623
|
+
try:
|
|
624
|
+
import time
|
|
625
|
+
work_dir = executor_work_dir or os.getcwd()
|
|
626
|
+
now = time.time()
|
|
627
|
+
deliverable_exts = {".pptx", ".ppt", ".csv", ".png", ".jpg", ".jpeg", ".pdf", ".zip"}
|
|
628
|
+
suggestions = []
|
|
629
|
+
if os.path.isdir(work_dir):
|
|
630
|
+
for name in os.listdir(work_dir):
|
|
631
|
+
path = os.path.join(work_dir, name)
|
|
632
|
+
if not os.path.isfile(path):
|
|
633
|
+
continue
|
|
634
|
+
ext = os.path.splitext(name)[1].lower()
|
|
635
|
+
if ext not in deliverable_exts:
|
|
636
|
+
continue
|
|
637
|
+
try:
|
|
638
|
+
size = os.path.getsize(path)
|
|
639
|
+
if size < min_size_bytes:
|
|
640
|
+
continue
|
|
641
|
+
mtime = os.path.getmtime(path)
|
|
642
|
+
age_s = now - mtime
|
|
643
|
+
if age_s > max_age_minutes * 60:
|
|
644
|
+
continue
|
|
645
|
+
suggestions.append({
|
|
646
|
+
"filename": name,
|
|
647
|
+
"absolute_path": path,
|
|
648
|
+
"size_bytes": size,
|
|
649
|
+
"age_seconds": int(age_s),
|
|
650
|
+
"extension": ext,
|
|
651
|
+
})
|
|
652
|
+
except Exception:
|
|
653
|
+
continue
|
|
654
|
+
# Sort by size desc then recency
|
|
655
|
+
suggestions.sort(key=lambda x: (x["size_bytes"], -x["age_seconds"]), reverse=True)
|
|
656
|
+
return json.dumps({"suggestions": suggestions[:max_files]})
|
|
657
|
+
except Exception as e:
|
|
658
|
+
return json.dumps({"error": str(e)})
|
|
659
|
+
|
|
660
|
+
def bound_list_recent_deliverables_typed(max_age_minutes: int = 15, max_files: int = 10, min_size_bytes: int = 1024) -> Dict[str, Any]:
|
|
661
|
+
return _to_text_part(asyncio.run(_list_recent_deliverables(max_age_minutes, max_files, min_size_bytes)))
|
|
662
|
+
|
|
663
|
+
tools.append(FunctionTool(
|
|
664
|
+
bound_list_recent_deliverables_typed,
|
|
665
|
+
name="list_recent_deliverables",
|
|
666
|
+
description="List likely deliverables (by type, size, recency) without uploading; returns suggestions for human-like selection"
|
|
667
|
+
))
|
|
543
668
|
|
|
544
669
|
logger.info(f"✅ Universal file tools created for work_dir: {executor_work_dir}")
|
|
545
670
|
return tools
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Google Custom Search (CSE) tool.
|
|
3
|
+
|
|
4
|
+
Provides `google_cse_search` async function that agents can call as a FunctionTool.
|
|
5
|
+
It reads API credentials from environment variables:
|
|
6
|
+
- GOOGLE_CSE_KEY
|
|
7
|
+
- GOOGLE_CSE_CX
|
|
8
|
+
|
|
9
|
+
Parameters mirror the CSE REST API where practical. Returns a JSON string.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import os
|
|
13
|
+
import json
|
|
14
|
+
import requests
|
|
15
|
+
from typing import Any, Dict, Optional
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _get_env_or_error() -> Dict[str, str]:
|
|
19
|
+
api_key = os.getenv("GOOGLE_CSE_KEY")
|
|
20
|
+
cx_env = os.getenv("GOOGLE_CSE_CX")
|
|
21
|
+
if not api_key:
|
|
22
|
+
raise RuntimeError("GOOGLE_CSE_KEY is not set in the environment variables!")
|
|
23
|
+
if not cx_env:
|
|
24
|
+
raise RuntimeError("GOOGLE_CSE_CX is not set in the environment variables!")
|
|
25
|
+
return {"key": api_key, "cx": cx_env}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _build_params(text: Optional[str], parameters: Optional[Dict[str, Any]], env_cx: str) -> Dict[str, Any]:
|
|
29
|
+
parameters = parameters or {}
|
|
30
|
+
# Required
|
|
31
|
+
q = (parameters.get("q") or text or "")
|
|
32
|
+
cx = parameters.get("cx") or env_cx
|
|
33
|
+
|
|
34
|
+
params: Dict[str, Any] = {
|
|
35
|
+
"q": q,
|
|
36
|
+
"cx": cx,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
# Optional passthroughs
|
|
40
|
+
if "num" in parameters and parameters["num"] is not None:
|
|
41
|
+
params["num"] = parameters["num"]
|
|
42
|
+
if "start" in parameters and parameters["start"] is not None:
|
|
43
|
+
params["start"] = parameters["start"]
|
|
44
|
+
if parameters.get("safe"):
|
|
45
|
+
params["safe"] = parameters["safe"]
|
|
46
|
+
if parameters.get("dateRestrict"):
|
|
47
|
+
params["dateRestrict"] = parameters["dateRestrict"]
|
|
48
|
+
if parameters.get("siteSearch"):
|
|
49
|
+
params["siteSearch"] = parameters["siteSearch"]
|
|
50
|
+
if parameters.get("siteSearchFilter"):
|
|
51
|
+
params["siteSearchFilter"] = parameters["siteSearchFilter"]
|
|
52
|
+
if parameters.get("searchType"):
|
|
53
|
+
params["searchType"] = parameters["searchType"]
|
|
54
|
+
# Image-specific filters
|
|
55
|
+
if parameters.get("imgSize"):
|
|
56
|
+
params["imgSize"] = parameters["imgSize"]
|
|
57
|
+
if parameters.get("imgType"):
|
|
58
|
+
params["imgType"] = parameters["imgType"]
|
|
59
|
+
if parameters.get("imgColorType"):
|
|
60
|
+
params["imgColorType"] = parameters["imgColorType"]
|
|
61
|
+
if parameters.get("imgDominantColor"):
|
|
62
|
+
params["imgDominantColor"] = parameters["imgDominantColor"]
|
|
63
|
+
if parameters.get("imgAspectRatio"):
|
|
64
|
+
params["imgAspectRatio"] = parameters["imgAspectRatio"]
|
|
65
|
+
if parameters.get("rights"):
|
|
66
|
+
params["rights"] = parameters["rights"]
|
|
67
|
+
if parameters.get("gl"):
|
|
68
|
+
params["gl"] = parameters["gl"]
|
|
69
|
+
if parameters.get("hl"):
|
|
70
|
+
params["hl"] = parameters["hl"]
|
|
71
|
+
if parameters.get("lr"):
|
|
72
|
+
params["lr"] = parameters["lr"]
|
|
73
|
+
if parameters.get("sort"):
|
|
74
|
+
params["sort"] = parameters["sort"]
|
|
75
|
+
if parameters.get("exactTerms"):
|
|
76
|
+
params["exactTerms"] = parameters["exactTerms"]
|
|
77
|
+
if parameters.get("excludeTerms"):
|
|
78
|
+
params["excludeTerms"] = parameters["excludeTerms"]
|
|
79
|
+
if parameters.get("orTerms"):
|
|
80
|
+
params["orTerms"] = parameters["orTerms"]
|
|
81
|
+
if parameters.get("fileType"):
|
|
82
|
+
params["fileType"] = parameters["fileType"]
|
|
83
|
+
|
|
84
|
+
return params
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
async def google_cse_search(
|
|
88
|
+
text: Optional[str] = None,
|
|
89
|
+
parameters: Optional[Dict[str, Any]] = None,
|
|
90
|
+
) -> str:
|
|
91
|
+
"""
|
|
92
|
+
Perform a Google Custom Search.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
text: query text (used if `parameters.q` not provided)
|
|
96
|
+
parameters: optional extra parameters per CSE API (e.g., num, start, safe, dateRestrict, etc.)
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
JSON string of the raw CSE API response.
|
|
100
|
+
"""
|
|
101
|
+
try:
|
|
102
|
+
creds = _get_env_or_error()
|
|
103
|
+
api_key = creds["key"]
|
|
104
|
+
cx = creds["cx"]
|
|
105
|
+
|
|
106
|
+
params = _build_params(text, parameters, cx)
|
|
107
|
+
params["key"] = api_key
|
|
108
|
+
|
|
109
|
+
url = "https://www.googleapis.com/customsearch/v1"
|
|
110
|
+
resp = requests.get(url, params=params, timeout=20)
|
|
111
|
+
resp.raise_for_status()
|
|
112
|
+
data = resp.json()
|
|
113
|
+
return json.dumps(data)
|
|
114
|
+
except Exception as exc:
|
|
115
|
+
return json.dumps({"error": f"google_cse_search failed: {str(exc)}"})
|
|
116
|
+
|
|
117
|
+
|