astraagent 2.25.6 → 2.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -67,7 +67,7 @@ class LocalServerProvider(LLMProvider):
67
67
 
68
68
  headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
69
69
  url = f"{self.api_base}/chat/completions"
70
- timeout = aiohttp.ClientTimeout(total=30)
70
+ timeout = aiohttp.ClientTimeout(total=120) # Extended timeout for complex tasks
71
71
 
72
72
  async with aiohttp.ClientSession(timeout=timeout) as session:
73
73
  async with session.post(url, json=body, headers=headers) as response:
@@ -83,10 +83,29 @@ class LocalServerProvider(LLMProvider):
83
83
 
84
84
  message = choices[0].get("message", {})
85
85
  content = message.get("content", "")
86
+ tool_calls = message.get("tool_calls")
87
+
88
+ # Convert OpenAI tool calls to our format
89
+ formatted_tool_calls = []
90
+ if tool_calls:
91
+ for tc in tool_calls:
92
+ if tc.get("type") == "function":
93
+ f = tc["function"]
94
+ try:
95
+ args = json.loads(f["arguments"])
96
+ formatted_tool_calls.append({
97
+ "id": tc.get("id"),
98
+ "name": f["name"],
99
+ "arguments": args
100
+ })
101
+ except:
102
+ continue
103
+
86
104
  usage = data.get("usage", {})
87
105
 
88
106
  return LLMResponse(
89
- content=content,
107
+ content=content or "",
108
+ tool_calls=formatted_tool_calls if formatted_tool_calls else None,
90
109
  finish_reason=choices[0].get("finish_reason", "stop"),
91
110
  tokens_used=usage.get("total_tokens", 0),
92
111
  model=data.get("model", self.model),
@@ -132,6 +151,8 @@ class OpenAIProvider(LLMProvider):
132
151
  "temperature": temperature,
133
152
  "max_tokens": max_tokens,
134
153
  }
154
+ if tools:
155
+ body["tools"] = [{"type": "function", "function": t} for t in tools]
135
156
 
136
157
  headers = {
137
158
  "Authorization": f"Bearer {self.api_key}",
@@ -139,7 +160,7 @@ class OpenAIProvider(LLMProvider):
139
160
  }
140
161
 
141
162
  url = "https://api.openai.com/v1/chat/completions"
142
- timeout = aiohttp.ClientTimeout(total=30)
163
+ timeout = aiohttp.ClientTimeout(total=120) # Extended timeout for complex tasks
143
164
 
144
165
  async with aiohttp.ClientSession(timeout=timeout) as session:
145
166
  async with session.post(url, json=body, headers=headers) as response:
@@ -155,10 +176,29 @@ class OpenAIProvider(LLMProvider):
155
176
 
156
177
  message = choices[0].get("message", {})
157
178
  content = message.get("content", "")
179
+ tool_calls = message.get("tool_calls")
180
+
181
+ # Convert OpenAI tool calls to our format
182
+ formatted_tool_calls = []
183
+ if tool_calls:
184
+ for tc in tool_calls:
185
+ if tc.get("type") == "function":
186
+ f = tc["function"]
187
+ try:
188
+ args = json.loads(f["arguments"])
189
+ formatted_tool_calls.append({
190
+ "id": tc.get("id"),
191
+ "name": f["name"],
192
+ "arguments": args
193
+ })
194
+ except:
195
+ continue
196
+
158
197
  usage = data.get("usage", {})
159
198
 
160
199
  return LLMResponse(
161
- content=content,
200
+ content=content or "",
201
+ tool_calls=formatted_tool_calls if formatted_tool_calls else None,
162
202
  finish_reason=choices[0].get("finish_reason", "stop"),
163
203
  tokens_used=usage.get("total_tokens", 0),
164
204
  model=self.model,
@@ -200,7 +240,23 @@ class GeminiProvider(LLMProvider):
200
240
  if msg.role == "user":
201
241
  contents.append({"role": "user", "parts": [{"text": msg.content}]})
202
242
  elif msg.role == "assistant":
203
- contents.append({"role": "model", "parts": [{"text": msg.content}]})
243
+ parts = []
244
+ if msg.content:
245
+ parts.append({"text": msg.content})
246
+ if msg.tool_calls:
247
+ for tc in msg.tool_calls:
248
+ parts.append({"functionCall": {"name": tc["name"], "args": tc["arguments"]}})
249
+ contents.append({"role": "model", "parts": parts})
250
+ elif msg.role == "tool":
251
+ contents.append({
252
+ "role": "function",
253
+ "parts": [{
254
+ "functionResponse": {
255
+ "name": msg.name,
256
+ "response": {"result": msg.content}
257
+ }
258
+ }]
259
+ })
204
260
 
205
261
  body = {
206
262
  "contents": contents,
@@ -209,9 +265,11 @@ class GeminiProvider(LLMProvider):
209
265
  "maxOutputTokens": max_tokens,
210
266
  }
211
267
  }
268
+ if tools:
269
+ body["tools"] = [{"function_declarations": tools}]
212
270
 
213
271
  url = f"https://generativelanguage.googleapis.com/v1beta/models/{self.model}:generateContent?key={self.api_key}"
214
- timeout = aiohttp.ClientTimeout(total=30)
272
+ timeout = aiohttp.ClientTimeout(total=300) # Extended timeout for complex tasks
215
273
 
216
274
  headers = {"Content-Type": "application/json"}
217
275
 
@@ -219,6 +277,8 @@ class GeminiProvider(LLMProvider):
219
277
  async with session.post(url, json=body, headers=headers) as response:
220
278
  if response.status != 200:
221
279
  error_text = await response.text()
280
+ if response.status == 429:
281
+ raise RuntimeError("🛑 Gemini Quota Exceeded (429). You've hit the rate limit or monthly quota. Please check https://ai.google.dev/pricing")
222
282
  raise RuntimeError(f"Gemini error {response.status}: {error_text[:200]}")
223
283
 
224
284
  data = await response.json()
@@ -228,12 +288,24 @@ class GeminiProvider(LLMProvider):
228
288
  return LLMResponse(content="No response from Gemini")
229
289
 
230
290
  content_parts = candidates[0].get("content", {}).get("parts", [])
231
- content = content_parts[0].get("text", "") if content_parts else ""
291
+ content = ""
292
+ tool_calls = []
293
+
294
+ for part in content_parts:
295
+ if "text" in part:
296
+ content += part["text"]
297
+ if "functionCall" in part:
298
+ fc = part["functionCall"]
299
+ tool_calls.append({
300
+ "name": fc["name"],
301
+ "arguments": fc.get("args", {})
302
+ })
232
303
 
233
304
  usage = data.get("usageMetadata", {})
234
305
 
235
306
  return LLMResponse(
236
- content=content,
307
+ content=content or "",
308
+ tool_calls=tool_calls if tool_calls else None,
237
309
  finish_reason=candidates[0].get("finishReason", "STOP"),
238
310
  tokens_used=usage.get("totalInputTokens", 0) + usage.get("totalOutputTokens", 0),
239
311
  model=self.model,
@@ -241,7 +313,7 @@ class GeminiProvider(LLMProvider):
241
313
  )
242
314
 
243
315
  except asyncio.TimeoutError:
244
- raise RuntimeError("⏱️ Gemini API timeout")
316
+ raise RuntimeError("⏱️ Gemini API timeout - Increased due to complexity. Try simplifying request.")
245
317
  except Exception as e:
246
318
  raise RuntimeError(f"Gemini error: {str(e)}")
247
319
 
@@ -285,7 +357,7 @@ class AnthropicProvider(LLMProvider):
285
357
  }
286
358
 
287
359
  url = "https://api.anthropic.com/v1/messages"
288
- timeout = aiohttp.ClientTimeout(total=30)
360
+ timeout = aiohttp.ClientTimeout(total=120) # Extended timeout for complex tasks
289
361
 
290
362
  async with aiohttp.ClientSession(timeout=timeout) as session:
291
363
  async with session.post(url, json=body, headers=headers) as response:
@@ -349,6 +421,8 @@ class OpenRouterProvider(LLMProvider):
349
421
  "temperature": temperature,
350
422
  "max_tokens": max_tokens,
351
423
  }
424
+ if tools:
425
+ body["tools"] = [{"type": "function", "function": t} for t in tools]
352
426
 
353
427
  headers = {
354
428
  "Authorization": f"Bearer {self.api_key}",
@@ -357,7 +431,7 @@ class OpenRouterProvider(LLMProvider):
357
431
  }
358
432
 
359
433
  url = "https://openrouter.ai/api/v1/chat/completions"
360
- timeout = aiohttp.ClientTimeout(total=30)
434
+ timeout = aiohttp.ClientTimeout(total=120) # Extended timeout for complex tasks
361
435
 
362
436
  async with aiohttp.ClientSession(timeout=timeout) as session:
363
437
  async with session.post(url, json=body, headers=headers) as response:
@@ -373,10 +447,29 @@ class OpenRouterProvider(LLMProvider):
373
447
 
374
448
  message = choices[0].get("message", {})
375
449
  content = message.get("content", "")
450
+ tool_calls = message.get("tool_calls")
451
+
452
+ # Convert OpenAI tool calls to our format
453
+ formatted_tool_calls = []
454
+ if tool_calls:
455
+ for tc in tool_calls:
456
+ if tc.get("type") == "function":
457
+ f = tc["function"]
458
+ try:
459
+ args = json.loads(f["arguments"])
460
+ formatted_tool_calls.append({
461
+ "id": tc.get("id"),
462
+ "name": f["name"],
463
+ "arguments": args
464
+ })
465
+ except:
466
+ continue
467
+
376
468
  usage = data.get("usage", {})
377
469
 
378
470
  return LLMResponse(
379
- content=content,
471
+ content=content or "",
472
+ tool_calls=formatted_tool_calls if formatted_tool_calls else None,
380
473
  finish_reason=choices[0].get("finish_reason", "stop"),
381
474
  tokens_used=usage.get("total_tokens", 0),
382
475
  model=self.model,
@@ -420,6 +513,8 @@ class GroqProvider(LLMProvider):
420
513
  "temperature": temperature,
421
514
  "max_tokens": max_tokens,
422
515
  }
516
+ if tools:
517
+ body["tools"] = [{"type": "function", "function": t} for t in tools]
423
518
 
424
519
  headers = {
425
520
  "Authorization": f"Bearer {self.api_key}",
@@ -427,7 +522,7 @@ class GroqProvider(LLMProvider):
427
522
  }
428
523
 
429
524
  url = "https://api.groq.com/openai/v1/chat/completions"
430
- timeout = aiohttp.ClientTimeout(total=30)
525
+ timeout = aiohttp.ClientTimeout(total=120) # Extended timeout for complex tasks
431
526
 
432
527
  async with aiohttp.ClientSession(timeout=timeout) as session:
433
528
  async with session.post(url, json=body, headers=headers) as response:
@@ -443,10 +538,29 @@ class GroqProvider(LLMProvider):
443
538
 
444
539
  message = choices[0].get("message", {})
445
540
  content = message.get("content", "")
541
+ tool_calls = message.get("tool_calls")
542
+
543
+ # Convert OpenAI tool calls to our format
544
+ formatted_tool_calls = []
545
+ if tool_calls:
546
+ for tc in tool_calls:
547
+ if tc.get("type") == "function":
548
+ f = tc["function"]
549
+ try:
550
+ args = json.loads(f["arguments"])
551
+ formatted_tool_calls.append({
552
+ "id": tc.get("id"),
553
+ "name": f["name"],
554
+ "arguments": args
555
+ })
556
+ except:
557
+ continue
558
+
446
559
  usage = data.get("usage", {})
447
560
 
448
561
  return LLMResponse(
449
- content=content,
562
+ content=content or "",
563
+ tool_calls=formatted_tool_calls if formatted_tool_calls else None,
450
564
  finish_reason=choices[0].get("finish_reason", "stop"),
451
565
  tokens_used=usage.get("total_tokens", 0),
452
566
  model=self.model,
@@ -506,8 +620,8 @@ PROVIDERS = {
506
620
  }
507
621
 
508
622
 
509
- def create_provider(provider_name: str = "local", model: str = None,
510
- api_key: str = None, api_base: str = None) -> LLMProvider:
623
+ def create_provider(provider_name: str = "local", model: str = None,
624
+ api_key: str = None, api_base: str = None) -> LLMProvider:
511
625
  """Factory function to create LLM provider."""
512
626
  provider_name = provider_name.lower().strip()
513
627
 
@@ -517,10 +631,10 @@ def create_provider(provider_name: str = "local", model: str = None,
517
631
  provider_info = PROVIDERS[provider_name]
518
632
  provider_class = provider_info["class"]
519
633
 
520
- # Local provider accepts custom api_base; other providers ignore it.
521
- if provider_name == "local":
522
- return provider_class(api_key=api_key, model=model, api_base=api_base)
523
- return provider_class(api_key=api_key, model=model)
634
+ # Local provider accepts custom api_base; other providers ignore it.
635
+ if provider_name == "local":
636
+ return provider_class(api_key=api_key, model=model, api_base=api_base)
637
+ return provider_class(api_key=api_key, model=model)
524
638
 
525
639
 
526
640
  def get_provider_info(provider_name: str = None) -> Dict[str, Any]:
package/astra/prompts.py CHANGED
@@ -4,34 +4,38 @@ Sophisticated system prompts for world-class AI agent performance.
4
4
  """
5
5
 
6
6
  # Main system prompt - designed for maximum intelligence and capability
7
- SYSTEM_PROMPT = """You are AstraAgent, an autonomous system-level AI assistant.
8
- You can:
9
- - Execute code directly on the user's system
10
- - Open websites, apps, and control the desktop
11
- - Access the internet and browse websites
12
- - Read and write files
13
- - Run shell commands
14
- - Persist and retrieve memory across sessions
15
- - Control Windows: open apps, scroll, click, type, screenshots
16
-
17
- === CORE IDENTITY ===
18
- - Name: AstraAgent v2.25.6
19
- - Role: Autonomous AI Agent with full system access
20
- - Capabilities: Windows automation, coding, research, file management, web browsing
21
- - Personality: Intelligent, efficient, proactive, and helpful
22
-
23
- === CAPABILITY CONTRACT ===
24
- You MUST explicitly follow:
25
- 1. System prompt instructions
26
- 2. Memory context injected into prompt/messages
27
- 3. Tool schemas and parameter names
28
-
29
- If memory context conflicts with user request:
30
- - Ask for clarification or proceed with the latest explicit user instruction.
31
- - Never ignore memory context silently.
32
-
33
- === AVAILABLE TOOLS ===
34
- {tools}
7
+ SYSTEM_PROMPT = """You are AstraAgent, an autonomous system-level AI assistant.
8
+ You can:
9
+ - Execute code directly on the user's system
10
+ - Open websites, apps, and control the desktop
11
+ - Access the internet and browse websites
12
+ - Read and write files
13
+ - Run shell commands
14
+ - Persist and retrieve memory across sessions
15
+ - Control Windows: open apps, scroll, click, type, screenshots
16
+
17
+ === CORE IDENTITY ===
18
+ - Name: AstraAgent v2.26.0 (Master Engineer Edition)
19
+ - Role: Autonomous System-Level Software Engineer & Global Assistant
20
+ - Capabilities: Full System Access, Windows Automation, production-grade Software Development, Deep Research, Autonomous Problem Solving.
21
+ - Knowledge: I am aware of all my tools and can use them to manipulate files, run local code, browse the live web, and control the user's desktop natively.
22
+ - Operating Mode: I can work 24/7 autonomously once a goal is set.
23
+
24
+ === CAPABILITY CONTRACT ===
25
+ 1. I have DIRECT tool access to the user's computer. I don't just "suggest" code; I write and run it.
26
+ 2. I use internal memory to maintain state across complex multi-step tasks.
27
+ 3. If I fail, I analyze why and retry with a different strategy.
28
+ You MUST explicitly follow:
29
+ 1. System prompt instructions
30
+ 2. Memory context injected into prompt/messages
31
+ 3. Tool schemas and parameter names
32
+
33
+ If memory context conflicts with user request:
34
+ - Ask for clarification or proceed with the latest explicit user instruction.
35
+ - Never ignore memory context silently.
36
+
37
+ === AVAILABLE TOOLS ===
38
+ {tools}
35
39
 
36
40
  === IMPORTANT TOOL EXAMPLES ===
37
41
 
@@ -84,19 +88,19 @@ To SPEAK out loud:
84
88
  "args": {{"text": "Hello, how can I help you?"}}
85
89
  }}
86
90
 
87
- To SEARCH the web:
88
- {{
89
- "thought": "Searching for information",
90
- "action": "web_search",
91
- "args": {{"query": "Python tutorials"}}
92
- }}
93
-
94
- To SEARCH with multiple engines:
95
- {{
96
- "thought": "Using multiple search providers for better coverage",
97
- "action": "web_search_multi",
98
- "args": {{"query": "Python tutorials", "provider": "auto", "num_results": 8}}
99
- }}
91
+ To SEARCH the web:
92
+ {{
93
+ "thought": "Searching for information",
94
+ "action": "web_search",
95
+ "args": {{"query": "Python tutorials"}}
96
+ }}
97
+
98
+ To SEARCH with multiple engines:
99
+ {{
100
+ "thought": "Using multiple search providers for better coverage",
101
+ "action": "web_search_multi",
102
+ "args": {{"query": "Python tutorials", "provider": "auto", "num_results": 8}}
103
+ }}
100
104
 
101
105
  === RESPONSE FORMAT ===
102
106
  ALWAYS respond with valid JSON:
@@ -114,23 +118,23 @@ For final answers (ONLY when task is complete):
114
118
  "final": "Your response to user"
115
119
  }}
116
120
 
117
- === CRITICAL RULES ===
118
- 1. ALWAYS use JSON format
119
- 2. EXECUTE actions with tools - NEVER just describe how to do something
121
+ === CRITICAL RULES ===
122
+ 1. ALWAYS use JSON format
123
+ 2. EXECUTE actions with tools - NEVER just describe how to do something
120
124
  3. "open youtube" = use open_url tool
121
125
  4. "open notepad" = use open_app tool
122
126
  5. "scroll down" = use desktop_control tool
123
127
  6. "set volume 30%" = use system_control with action="set_volume", value="30"
124
128
  7. "lock computer" = use system_control with action="lock"
125
- 8. "mute" = use system_control with action="mute"
126
- 9. DO NOT give instructions - EXECUTE the action directly!
127
- 10. You MUST use memory context when planning the next step
128
- 11. If your output is not valid JSON, immediately self-correct and return valid JSON only
129
-
130
- === WORKSPACE ===
131
- Working directory: {workspace}
132
-
133
- You are a powerful AI agent. USE YOUR TOOLS to help the user! Execute actions, don't describe them!"""
129
+ 8. "mute" = use system_control with action="mute"
130
+ 9. DO NOT give instructions - EXECUTE the action directly!
131
+ 10. You MUST use memory context when planning the next step
132
+ 11. If your output is not valid JSON, immediately self-correct and return valid JSON only
133
+
134
+ === WORKSPACE ===
135
+ Working directory: {workspace}
136
+
137
+ You are a powerful AI agent. USE YOUR TOOLS to help the user! Execute actions, don't describe them!"""
134
138
 
135
139
 
136
140
  # Enhanced prompt for complex tasks
@@ -217,6 +221,26 @@ You are in creative problem-solving mode:
217
221
  """
218
222
 
219
223
 
224
+
225
+ # Engineer mode prompt
226
+ ENGINEER_PROMPT = """
227
+ === 24/7 ENGINEER MODE ===
228
+
229
+ You are a hired Autonomous Engineer. Your mission is to maintain, improve, and operate this system continuously.
230
+ You have FULL ACCESS to the user's computer (files, shell, browser, apps).
231
+
232
+ Protocol:
233
+ 1. **Proactive**: Don't just wait for orders. Identify improvements and fix issues.
234
+ 2. **Robust**: Handle errors gracefully. Retry with exponential backoff.
235
+ 3. **Budget Aware**: Be mindful of token usage. Consolidate steps where possible.
236
+ 4. **Full Context**: Always check memory and file system before asking questions.
237
+ 5. **Persistence**: If a task fails, try alternative methods until success.
238
+ 6. **Transparency**: Log every major action and decision.
239
+
240
+ You are NOT a chatbot. You are an ENGINEER. ACT LIKE ONE.
241
+ """
242
+
243
+
220
244
  # Error recovery prompt
221
245
  ERROR_RECOVERY_PROMPT = """
222
246
  === ERROR RECOVERY ===
@@ -242,7 +266,8 @@ def build_system_prompt(workspace: str, tools: str, mode: str = "default") -> st
242
266
  "research": RESEARCH_PROMPT,
243
267
  "code": CODE_PROMPT,
244
268
  "creative": CREATIVE_PROMPT,
245
- "error": ERROR_RECOVERY_PROMPT
269
+ "error": ERROR_RECOVERY_PROMPT,
270
+ "engineer": ENGINEER_PROMPT
246
271
  }
247
272
 
248
273
  if mode in mode_prompts:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "astraagent",
3
- "version": "2.25.6",
3
+ "version": "2.26.0",
4
4
  "description": "AstraAgent launcher package for npm",
5
5
  "keywords": [
6
6
  "astraagent",
@@ -48,4 +48,4 @@
48
48
  "engines": {
49
49
  "node": ">=18.0.0"
50
50
  }
51
- }
51
+ }