sicily 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- Auth/gmail_auth.py +71 -0
- Auth/swiggy_auth.py +54 -0
- Auth/tavily_auth.py +10 -0
- Auth/telegram_auth.py +12 -0
- Context/preferences.md +0 -0
- Recurring_Tasks/recurring_tasks.py +239 -0
- Recurring_Tasks/recurring_tasks.yaml +32 -0
- Souls/eval_llm.md +35 -0
- Souls/main_llm.md +35 -0
- Souls/safety_llm.md +22 -0
- Tests/conftest.py +57 -0
- Tests/test_connectors.py +34 -0
- Tests/test_memory_and_context.py +78 -0
- Tests/test_security.py +43 -0
- Tests/test_settings.py +20 -0
- Tests/test_souls.py +26 -0
- Tests/test_telegram_commands.py +31 -0
- Tests/test_tool_manager.py +35 -0
- agent.py +704 -0
- cli.py +55 -0
- configuration.py +198 -0
- connectors.py +93 -0
- main.py +625 -0
- memory_and_context.py +288 -0
- session_store.py +151 -0
- settings.example.json +7 -0
- sicily-0.2.3.dist-info/METADATA +150 -0
- sicily-0.2.3.dist-info/RECORD +32 -0
- sicily-0.2.3.dist-info/WHEEL +4 -0
- sicily-0.2.3.dist-info/entry_points.txt +2 -0
- telegram_commands.py +200 -0
- tool_manager.py +226 -0
Auth/gmail_auth.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import socket
|
|
3
|
+
from google.oauth2.credentials import Credentials
|
|
4
|
+
from google_auth_oauthlib.flow import InstalledAppFlow
|
|
5
|
+
from google.auth.transport.requests import Request
|
|
6
|
+
|
|
7
|
+
SCOPES = [
|
|
8
|
+
"https://www.googleapis.com/auth/gmail.modify",
|
|
9
|
+
"https://www.googleapis.com/auth/gmail.send",
|
|
10
|
+
"https://www.googleapis.com/auth/gmail.readonly",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
CREDENTIALS_FILE = "google_credentials.json"
|
|
14
|
+
PORT = 8080
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
async def get_gmail_token() -> str:
|
|
18
|
+
"""Get Gmail token, handling stale port from cancelled auth flows."""
|
|
19
|
+
|
|
20
|
+
token_file = "gmail_token.json"
|
|
21
|
+
creds = None
|
|
22
|
+
|
|
23
|
+
if os.path.exists(token_file):
|
|
24
|
+
creds = Credentials.from_authorized_user_file(token_file, SCOPES)
|
|
25
|
+
|
|
26
|
+
if creds and creds.expired and creds.refresh_token:
|
|
27
|
+
creds.refresh(Request())
|
|
28
|
+
|
|
29
|
+
elif not creds or not creds.valid:
|
|
30
|
+
if not os.path.exists(CREDENTIALS_FILE):
|
|
31
|
+
raise FileNotFoundError(f"❌ {CREDENTIALS_FILE} not found!")
|
|
32
|
+
|
|
33
|
+
# --- Free the port if it's stuck from a previous cancelled flow ---
|
|
34
|
+
if _is_port_in_use(PORT):
|
|
35
|
+
print(f"⚠️ Port {PORT} already in use — freeing it...")
|
|
36
|
+
_free_port(PORT)
|
|
37
|
+
|
|
38
|
+
flow = InstalledAppFlow.from_client_secrets_file(CREDENTIALS_FILE, SCOPES)
|
|
39
|
+
creds = flow.run_local_server(
|
|
40
|
+
port=PORT,
|
|
41
|
+
prompt='consent'
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
with open(token_file, "w") as f:
|
|
45
|
+
f.write(creds.to_json())
|
|
46
|
+
|
|
47
|
+
return creds.token
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _is_port_in_use(port: int) -> bool:
|
|
51
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
52
|
+
return s.connect_ex(("localhost", port)) == 0
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _free_port(port: int):
|
|
56
|
+
"""Kill whatever process is holding the port."""
|
|
57
|
+
import subprocess, sys
|
|
58
|
+
if sys.platform == "win32":
|
|
59
|
+
result = subprocess.run(
|
|
60
|
+
f"for /f \"tokens=5\" %a in ('netstat -aon ^| find \":{port}\"') do taskkill /F /PID %a",
|
|
61
|
+
shell=True, capture_output=True
|
|
62
|
+
)
|
|
63
|
+
else:
|
|
64
|
+
# Linux / macOS
|
|
65
|
+
result = subprocess.run(
|
|
66
|
+
f"fuser -k {port}/tcp",
|
|
67
|
+
shell=True, capture_output=True
|
|
68
|
+
)
|
|
69
|
+
# Give the OS a moment to release the port
|
|
70
|
+
import time
|
|
71
|
+
time.sleep(0.5)
|
Auth/swiggy_auth.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import hashlib, secrets, base64, time, webbrowser
|
|
3
|
+
import httpx
|
|
4
|
+
from urllib.parse import urlencode
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
CLIENT_ID = os.getenv("SWIGGY_CLIENT_ID")
|
|
8
|
+
REDIRECT_URI = "http://localhost:8000/callback"
|
|
9
|
+
_oauth_code_future = None
|
|
10
|
+
|
|
11
|
+
_cache = {"token": None, "expires_at": 0}
|
|
12
|
+
|
|
13
|
+
async def get_valid_token() -> str:
|
|
14
|
+
"""Returns a cached token, or re-runs OAuth if expired."""
|
|
15
|
+
if _cache["token"] and time.time() < _cache["expires_at"] - 60:
|
|
16
|
+
return _cache["token"]
|
|
17
|
+
|
|
18
|
+
# PKCE
|
|
19
|
+
verifier = secrets.token_urlsafe(32)
|
|
20
|
+
digest = hashlib.sha256(verifier.encode()).digest()
|
|
21
|
+
challenge = base64.urlsafe_b64encode(digest).rstrip(b"=").decode()
|
|
22
|
+
|
|
23
|
+
# Open browser for phone + OTP
|
|
24
|
+
params = urlencode({
|
|
25
|
+
"response_type": "code",
|
|
26
|
+
"client_id": CLIENT_ID,
|
|
27
|
+
"redirect_uri": REDIRECT_URI,
|
|
28
|
+
"code_challenge": challenge,
|
|
29
|
+
"code_challenge_method": "S256",
|
|
30
|
+
"state": secrets.token_urlsafe(16),
|
|
31
|
+
"scope": "mcp:tools",
|
|
32
|
+
})
|
|
33
|
+
webbrowser.open(f"https://mcp.swiggy.com/auth/authorize?{params}")
|
|
34
|
+
|
|
35
|
+
global _oauth_code_future
|
|
36
|
+
|
|
37
|
+
loop = asyncio.get_running_loop()
|
|
38
|
+
_oauth_code_future = loop.create_future()
|
|
39
|
+
print("⏳ Waiting for OAuth redirect on http://localhost:8000/callback ...", flush=True)
|
|
40
|
+
code = await _oauth_code_future
|
|
41
|
+
|
|
42
|
+
async with httpx.AsyncClient() as client:
|
|
43
|
+
resp = await client.post("https://mcp.swiggy.com/auth/token", json={
|
|
44
|
+
"grant_type": "authorization_code",
|
|
45
|
+
"code": code,
|
|
46
|
+
"code_verifier": verifier,
|
|
47
|
+
"client_id": CLIENT_ID,
|
|
48
|
+
"redirect_uri": REDIRECT_URI,
|
|
49
|
+
})
|
|
50
|
+
|
|
51
|
+
token = resp.json()["access_token"]
|
|
52
|
+
_cache["token"] = token
|
|
53
|
+
_cache["expires_at"] = time.time() + 432000 # 5 days
|
|
54
|
+
return token
|
Auth/tavily_auth.py
ADDED
Auth/telegram_auth.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
async def get_telegram_config() -> dict:
|
|
4
|
+
env = os.environ.copy()
|
|
5
|
+
|
|
6
|
+
env.update({
|
|
7
|
+
"TELEGRAM_API_ID": os.environ["TELEGRAM_API_ID"],
|
|
8
|
+
"TELEGRAM_API_HASH": os.environ["TELEGRAM_API_HASH"],
|
|
9
|
+
"TELEGRAM_SESSION_STRING": os.environ["TELEGRAM_SESSION_STRING"],
|
|
10
|
+
})
|
|
11
|
+
|
|
12
|
+
return env
|
Context/preferences.md
ADDED
|
File without changes
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import re
|
|
3
|
+
import yaml
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from datetime import datetime, timedelta
|
|
7
|
+
|
|
8
|
+
YAML_FILE = Path.cwd() / "Recurring_Tasks" / "recurring_tasks.yaml"
|
|
9
|
+
|
|
10
|
+
_dispatch = None
|
|
11
|
+
|
|
12
|
+
def set_dispatch(fn):
|
|
13
|
+
global _dispatch
|
|
14
|
+
_dispatch = fn
|
|
15
|
+
|
|
16
|
+
VALID_DAYS = {
|
|
17
|
+
"mon", "tue", "wed", "thu", "fri", "sat", "sun"
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
WEEKDAY_MAP = {
|
|
21
|
+
0: "mon",
|
|
22
|
+
1: "tue",
|
|
23
|
+
2: "wed",
|
|
24
|
+
3: "thu",
|
|
25
|
+
4: "fri",
|
|
26
|
+
5: "sat",
|
|
27
|
+
6: "sun",
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# PUBLIC ENTRYPOINT
|
|
32
|
+
async def start_recurring_tasks():
|
|
33
|
+
|
|
34
|
+
tasks = load_and_validate_yaml()
|
|
35
|
+
|
|
36
|
+
enabled_tasks = [t for t in tasks if t["enabled"]]
|
|
37
|
+
|
|
38
|
+
print(f"🔁 Starting {len(enabled_tasks)} recurring task(s)...")
|
|
39
|
+
|
|
40
|
+
for task_config in enabled_tasks:
|
|
41
|
+
asyncio.create_task(task_runner(task_config))
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# YAML LOADING + VALIDATION
|
|
45
|
+
def load_and_validate_yaml():
|
|
46
|
+
|
|
47
|
+
with open(YAML_FILE, "r") as f:
|
|
48
|
+
data = yaml.safe_load(f)
|
|
49
|
+
|
|
50
|
+
if not data or "tasks" not in data:
|
|
51
|
+
raise ValueError("Missing 'tasks' in YAML.")
|
|
52
|
+
|
|
53
|
+
tasks = data["tasks"]
|
|
54
|
+
|
|
55
|
+
if not isinstance(tasks, list):
|
|
56
|
+
raise ValueError("'tasks' must be a list.")
|
|
57
|
+
|
|
58
|
+
seen_ids = set()
|
|
59
|
+
|
|
60
|
+
for task in tasks:
|
|
61
|
+
|
|
62
|
+
required_fields = ["id", "enabled", "task", "schedule"]
|
|
63
|
+
|
|
64
|
+
for field in required_fields:
|
|
65
|
+
if field not in task:
|
|
66
|
+
raise ValueError(f"Task missing required field: {field}")
|
|
67
|
+
|
|
68
|
+
task_id = task["id"]
|
|
69
|
+
|
|
70
|
+
if task_id in seen_ids:
|
|
71
|
+
raise ValueError(f"Duplicate task id: {task_id}")
|
|
72
|
+
|
|
73
|
+
seen_ids.add(task_id)
|
|
74
|
+
|
|
75
|
+
validate_schedule(task_id, task["schedule"])
|
|
76
|
+
|
|
77
|
+
return tasks
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def validate_schedule(task_id, schedule):
|
|
81
|
+
|
|
82
|
+
if "mode" not in schedule:
|
|
83
|
+
raise ValueError(f"[{task_id}] Missing schedule.mode")
|
|
84
|
+
|
|
85
|
+
mode = schedule["mode"]
|
|
86
|
+
|
|
87
|
+
if mode not in ["daily", "interval"]:
|
|
88
|
+
raise ValueError(f"[{task_id}] Invalid mode: {mode}")
|
|
89
|
+
|
|
90
|
+
# ── DAILY ────────────────────────────────────────────────
|
|
91
|
+
if mode == "daily":
|
|
92
|
+
|
|
93
|
+
if "at" not in schedule:
|
|
94
|
+
raise ValueError(f"[{task_id}] daily mode requires 'at'")
|
|
95
|
+
|
|
96
|
+
validate_time(schedule["at"], task_id)
|
|
97
|
+
|
|
98
|
+
# ── INTERVAL ─────────────────────────────────────────────
|
|
99
|
+
elif mode == "interval":
|
|
100
|
+
|
|
101
|
+
if "every" not in schedule:
|
|
102
|
+
raise ValueError(f"[{task_id}] interval mode requires 'every'")
|
|
103
|
+
|
|
104
|
+
validate_interval(schedule["every"], task_id)
|
|
105
|
+
|
|
106
|
+
if "start" in schedule:
|
|
107
|
+
validate_time(schedule["start"], task_id)
|
|
108
|
+
|
|
109
|
+
# ── DAYS ────────────────────────────────────────────────
|
|
110
|
+
if "days" in schedule:
|
|
111
|
+
|
|
112
|
+
if not isinstance(schedule["days"], list):
|
|
113
|
+
raise ValueError(f"[{task_id}] days must be a list")
|
|
114
|
+
|
|
115
|
+
for d in schedule["days"]:
|
|
116
|
+
if d not in VALID_DAYS:
|
|
117
|
+
raise ValueError(f"[{task_id}] Invalid day: {d}")
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def validate_time(value, task_id):
|
|
121
|
+
|
|
122
|
+
if not re.fullmatch(r"\d{2}:\d{2}", value):
|
|
123
|
+
raise ValueError(f"[{task_id}] Invalid time format: {value}")
|
|
124
|
+
|
|
125
|
+
hour, minute = map(int, value.split(":"))
|
|
126
|
+
|
|
127
|
+
if not (0 <= hour <= 23 and 0 <= minute <= 59):
|
|
128
|
+
raise ValueError(f"[{task_id}] Invalid time: {value}")
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def validate_interval(value, task_id):
|
|
132
|
+
|
|
133
|
+
if not re.fullmatch(r"\d+[mh]", value):
|
|
134
|
+
raise ValueError(f"[{task_id}] Invalid interval: {value}")
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
# TASK RUNNER
|
|
138
|
+
async def task_runner(task_config):
|
|
139
|
+
|
|
140
|
+
task_id = task_config["id"]
|
|
141
|
+
task_text = task_config["task"]
|
|
142
|
+
schedule = task_config["schedule"]
|
|
143
|
+
|
|
144
|
+
mode = schedule["mode"]
|
|
145
|
+
|
|
146
|
+
print(f"✅ Task loaded: {task_id}")
|
|
147
|
+
|
|
148
|
+
while True:
|
|
149
|
+
|
|
150
|
+
now = datetime.now()
|
|
151
|
+
|
|
152
|
+
# DAILY MODE
|
|
153
|
+
if mode == "daily":
|
|
154
|
+
|
|
155
|
+
run_time = build_today_datetime(schedule["at"])
|
|
156
|
+
|
|
157
|
+
if run_time <= now:
|
|
158
|
+
run_time += timedelta(days=1)
|
|
159
|
+
|
|
160
|
+
wait_seconds = (run_time - now).total_seconds()
|
|
161
|
+
|
|
162
|
+
await asyncio.sleep(wait_seconds)
|
|
163
|
+
|
|
164
|
+
if should_run_today(schedule):
|
|
165
|
+
await execute_task(task_id, task_text)
|
|
166
|
+
|
|
167
|
+
# INTERVAL MODE
|
|
168
|
+
elif mode == "interval":
|
|
169
|
+
|
|
170
|
+
if "start" in schedule:
|
|
171
|
+
|
|
172
|
+
first_run = build_today_datetime(schedule["start"])
|
|
173
|
+
|
|
174
|
+
if first_run > now:
|
|
175
|
+
wait_seconds = (first_run - now).total_seconds()
|
|
176
|
+
await asyncio.sleep(wait_seconds)
|
|
177
|
+
|
|
178
|
+
if should_run_today(schedule):
|
|
179
|
+
await execute_task(task_id, task_text)
|
|
180
|
+
|
|
181
|
+
interval_seconds = parse_interval(schedule["every"])
|
|
182
|
+
|
|
183
|
+
await asyncio.sleep(interval_seconds)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
# HELPERS
|
|
187
|
+
async def execute_task(task_id, task_text):
|
|
188
|
+
|
|
189
|
+
print(
|
|
190
|
+
f"\n🔁 TASK EXECUTED"
|
|
191
|
+
f"\n🆔 ID : {task_id}"
|
|
192
|
+
f"\n📝 Task : {task_text}"
|
|
193
|
+
f"\n🕒 Time : {datetime.now()}\n",
|
|
194
|
+
flush=True
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
if _dispatch is None:
|
|
198
|
+
print(f"⚠️ No dispatch set — skipping task {task_id}")
|
|
199
|
+
return
|
|
200
|
+
|
|
201
|
+
await _dispatch(task_id, task_text)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def build_today_datetime(time_str):
|
|
205
|
+
|
|
206
|
+
hour, minute = map(int, time_str.split(":"))
|
|
207
|
+
|
|
208
|
+
now = datetime.now()
|
|
209
|
+
|
|
210
|
+
return now.replace(
|
|
211
|
+
hour=hour,
|
|
212
|
+
minute=minute,
|
|
213
|
+
second=0,
|
|
214
|
+
microsecond=0
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def parse_interval(value):
|
|
219
|
+
|
|
220
|
+
amount = int(value[:-1])
|
|
221
|
+
unit = value[-1]
|
|
222
|
+
|
|
223
|
+
if unit == "m":
|
|
224
|
+
return amount * 60
|
|
225
|
+
|
|
226
|
+
if unit == "h":
|
|
227
|
+
return amount * 60 * 60
|
|
228
|
+
|
|
229
|
+
raise ValueError(f"Invalid interval unit: {value}")
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def should_run_today(schedule):
|
|
233
|
+
|
|
234
|
+
if "days" not in schedule:
|
|
235
|
+
return True
|
|
236
|
+
|
|
237
|
+
today = WEEKDAY_MAP[datetime.now().weekday()]
|
|
238
|
+
|
|
239
|
+
return today in schedule["days"]
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
tasks:
|
|
2
|
+
|
|
3
|
+
- id: morning_news
|
|
4
|
+
enabled: false
|
|
5
|
+
task: "Summarize the top AI news headlines"
|
|
6
|
+
schedule:
|
|
7
|
+
mode: daily
|
|
8
|
+
at: "08:00"
|
|
9
|
+
days: [mon, tue, wed, thu, fri]
|
|
10
|
+
|
|
11
|
+
- id: email_check
|
|
12
|
+
enabled: false
|
|
13
|
+
task: "Check unread emails and notify if anything important arrives"
|
|
14
|
+
schedule:
|
|
15
|
+
mode: interval
|
|
16
|
+
every: 30m
|
|
17
|
+
days: [mon, tue, wed]
|
|
18
|
+
|
|
19
|
+
- id: server_health
|
|
20
|
+
enabled: false
|
|
21
|
+
task: "Monitor server CPU and memory usage"
|
|
22
|
+
schedule:
|
|
23
|
+
mode: interval
|
|
24
|
+
every: 2h
|
|
25
|
+
start: "09:00"
|
|
26
|
+
|
|
27
|
+
- id: disabled_task
|
|
28
|
+
enabled: false
|
|
29
|
+
task: "Generate a weekly expense summary"
|
|
30
|
+
schedule:
|
|
31
|
+
mode: daily
|
|
32
|
+
at: "21:00"
|
Souls/eval_llm.md
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# User Preference & Behavior Extractor
|
|
2
|
+
|
|
3
|
+
You are a user preference extractor.
|
|
4
|
+
|
|
5
|
+
Analyze the full agent session and extract:
|
|
6
|
+
|
|
7
|
+
## User Preferences
|
|
8
|
+
|
|
9
|
+
- preferred products, services, brands, or categories
|
|
10
|
+
- recurring interests or usage patterns
|
|
11
|
+
- stated likes, dislikes, and priorities
|
|
12
|
+
- dietary, accessibility, budgetary, or other explicit constraints
|
|
13
|
+
|
|
14
|
+
## Decision-Making & Risk Behavior
|
|
15
|
+
|
|
16
|
+
- risk tolerance, especially around irreversible actions
|
|
17
|
+
- how the user responds to confirmations or approvals
|
|
18
|
+
- sensitivity to side effects, costs, or external actions
|
|
19
|
+
|
|
20
|
+
## Interaction Patterns
|
|
21
|
+
|
|
22
|
+
- notable communication habits
|
|
23
|
+
- preferred instruction style or level of detail
|
|
24
|
+
- patterns in how the user approves, rejects, or modifies tool calls
|
|
25
|
+
|
|
26
|
+
## Output Requirements
|
|
27
|
+
|
|
28
|
+
- extract only information supported by the session
|
|
29
|
+
- do not invent preferences or assumptions
|
|
30
|
+
- keep the output concise
|
|
31
|
+
- return clean Markdown
|
|
32
|
+
- avoid repeating semantically identical information
|
|
33
|
+
- prefer stable long-term behavioural patterns over temporary context
|
|
34
|
+
- do not store temporary objectives or one-time requests
|
|
35
|
+
- avoid storing information unlikely to matter in future sessions
|
Souls/main_llm.md
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# AI Assistant
|
|
2
|
+
You are a capable, action-oriented AI assistant with access to tools
|
|
3
|
+
from a variety of connected external services (productivity,
|
|
4
|
+
communication, e-commerce, logistics, finance, health, smart home,
|
|
5
|
+
and more).
|
|
6
|
+
|
|
7
|
+
## Core Behaviour
|
|
8
|
+
- Understand the user's request and act on it directly.
|
|
9
|
+
- Use the right tool for the right service based on the user's intent.
|
|
10
|
+
- Never fabricate information that a tool can provide — fetch it.
|
|
11
|
+
- Never ask the user "should I search?" or "shall I do that?" — just do it.
|
|
12
|
+
|
|
13
|
+
## Tool Usage
|
|
14
|
+
Call tools immediately and directly whenever they can fulfill the request.
|
|
15
|
+
Do not narrate what you are about to do — just do it.
|
|
16
|
+
|
|
17
|
+
## Search & Retry
|
|
18
|
+
- If a search returns no results, try 1–2 semantically close alternative queries before telling the user nothing was found.
|
|
19
|
+
- Do not ask permission to retry.
|
|
20
|
+
- Do not repeat results the user can already see.
|
|
21
|
+
|
|
22
|
+
## Multi-Service Awareness
|
|
23
|
+
- Multiple services may be connected simultaneously.
|
|
24
|
+
- Pick the correct service's tools based on the user's stated intent.
|
|
25
|
+
- If the user names a specific service, prioritise its tools.
|
|
26
|
+
- If it is genuinely unclear which service applies, ask one short clarifying question.
|
|
27
|
+
|
|
28
|
+
## Error Handling
|
|
29
|
+
- If a tool fails, explain the issue in one sentence and continue.
|
|
30
|
+
- Do not apologise repeatedly or go into technical detail.
|
|
31
|
+
|
|
32
|
+
## Response Style
|
|
33
|
+
- Be concise and action-oriented.
|
|
34
|
+
- Confirm completed actions briefly (e.g. "Done — item added.").
|
|
35
|
+
- Do not list options or ask clarifying questions for things you can infer from context.
|
Souls/safety_llm.md
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Tool Call Safety Classifier
|
|
2
|
+
You are a safety classifier for AI agent tool calls.
|
|
3
|
+
|
|
4
|
+
## SAFE — auto-execute without user confirmation
|
|
5
|
+
A tool call is **SAFE** if it is:
|
|
6
|
+
- Read-only (fetching, searching, listing, viewing data)
|
|
7
|
+
- Has no side effects on external systems
|
|
8
|
+
- Fully reversible or produces no lasting change
|
|
9
|
+
|
|
10
|
+
## UNSAFE — requires user confirmation before execution
|
|
11
|
+
A tool call is **UNSAFE** if it:
|
|
12
|
+
- Sends messages, emails, or notifications
|
|
13
|
+
- Places orders, initiates payments, or moves money
|
|
14
|
+
- Creates, modifies, or deletes data
|
|
15
|
+
- Triggers automations or scheduled actions
|
|
16
|
+
- Changes settings or state in any external system
|
|
17
|
+
|
|
18
|
+
## Decision Rule
|
|
19
|
+
When in doubt, classify as UNSAFE.
|
|
20
|
+
|
|
21
|
+
## Output
|
|
22
|
+
Return only structured output — no explanation.
|
Tests/conftest.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import ast
|
|
2
|
+
import sys
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
# Paths — adjust here if your folder layout differs
|
|
6
|
+
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
|
7
|
+
sys.path.insert(0, str(PROJECT_ROOT))
|
|
8
|
+
|
|
9
|
+
CONNECTORS_PATH = PROJECT_ROOT / "connectors.py"
|
|
10
|
+
TELEGRAM_COMMANDS_PATH = PROJECT_ROOT / "telegram_commands.py"
|
|
11
|
+
SETTINGS_EXAMPLE_PATH = PROJECT_ROOT / "settings.example.json"
|
|
12
|
+
AGENT_PATH = PROJECT_ROOT / "agent.py"
|
|
13
|
+
SOULS_DIR = PROJECT_ROOT / "Souls"
|
|
14
|
+
|
|
15
|
+
# AST helpers — read source as text, no project imports required
|
|
16
|
+
def parse(path: Path) -> ast.Module:
|
|
17
|
+
return ast.parse(path.read_text(encoding="utf-8"))
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_function_node(tree: ast.Module, func_name: str):
|
|
21
|
+
for node in ast.walk(tree):
|
|
22
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) and node.name == func_name:
|
|
23
|
+
return node
|
|
24
|
+
return None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def all_function_names(tree: ast.Module) -> set[str]:
|
|
28
|
+
return {
|
|
29
|
+
node.name for node in ast.walk(tree)
|
|
30
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef))
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_dict_literal(tree: ast.Module, var_name: str):
|
|
35
|
+
for node in ast.walk(tree):
|
|
36
|
+
if isinstance(node, ast.Assign):
|
|
37
|
+
for target in node.targets:
|
|
38
|
+
if isinstance(target, ast.Name) and target.id == var_name and isinstance(node.value, ast.Dict):
|
|
39
|
+
return node.value
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def dict_str_keys(dict_node: ast.Dict) -> list[str]:
|
|
44
|
+
return [k.value for k in dict_node.keys if isinstance(k, ast.Constant) and isinstance(k.value, str)]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def extract_call_first_arg_strings(scope_node, call_name: str) -> list[str]:
|
|
48
|
+
"""Within a given AST node, find all calls to `call_name` and return the
|
|
49
|
+
first positional argument when it's a string literal."""
|
|
50
|
+
results = []
|
|
51
|
+
for node in ast.walk(scope_node):
|
|
52
|
+
if isinstance(node, ast.Call):
|
|
53
|
+
fn = node.func
|
|
54
|
+
name = fn.id if isinstance(fn, ast.Name) else (fn.attr if isinstance(fn, ast.Attribute) else None)
|
|
55
|
+
if name == call_name and node.args and isinstance(node.args[0], ast.Constant) and isinstance(node.args[0].value, str):
|
|
56
|
+
results.append(node.args[0].value)
|
|
57
|
+
return results
|
Tests/test_connectors.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from Tests.conftest import (
|
|
2
|
+
parse,
|
|
3
|
+
get_dict_literal,
|
|
4
|
+
dict_str_keys,
|
|
5
|
+
all_function_names,
|
|
6
|
+
CONNECTORS_PATH,
|
|
7
|
+
TELEGRAM_COMMANDS_PATH,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# connectors.py <-> telegram_commands.py consistency
|
|
12
|
+
def test_each_connector_has_connect_and_disconnect_commands():
|
|
13
|
+
"""
|
|
14
|
+
For every key in CONNECTORS (connectors.py), telegram_commands.py must
|
|
15
|
+
define a connect_<key>_command and a disconnect_<key>_command function.
|
|
16
|
+
"""
|
|
17
|
+
connectors_tree = parse(CONNECTORS_PATH)
|
|
18
|
+
commands_tree = parse(TELEGRAM_COMMANDS_PATH)
|
|
19
|
+
|
|
20
|
+
connectors_dict = get_dict_literal(connectors_tree, "CONNECTORS")
|
|
21
|
+
assert connectors_dict is not None, "Could not find a `CONNECTORS = {...}` dict in connectors.py"
|
|
22
|
+
|
|
23
|
+
connector_keys = dict_str_keys(connectors_dict)
|
|
24
|
+
assert connector_keys, "CONNECTORS dict has no string keys — check it's defined as expected"
|
|
25
|
+
|
|
26
|
+
defined_functions = all_function_names(commands_tree)
|
|
27
|
+
|
|
28
|
+
missing = []
|
|
29
|
+
for key in connector_keys:
|
|
30
|
+
for fn_name in (f"connect_{key}_command", f"disconnect_{key}_command"):
|
|
31
|
+
if fn_name not in defined_functions:
|
|
32
|
+
missing.append(fn_name)
|
|
33
|
+
|
|
34
|
+
assert not missing, f"telegram_commands.py is missing: {missing}"
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
# memory_and_context.py — pure-logic checks
|
|
5
|
+
# (functions copied verbatim — see module docstring note at top of file)
|
|
6
|
+
def _parse_preference_lines(text: str) -> list[str]:
|
|
7
|
+
lines = []
|
|
8
|
+
for raw in text.splitlines():
|
|
9
|
+
line = raw.strip()
|
|
10
|
+
|
|
11
|
+
if not line:
|
|
12
|
+
continue
|
|
13
|
+
if line.startswith("#"):
|
|
14
|
+
continue
|
|
15
|
+
if set(line) <= set("-_* "):
|
|
16
|
+
continue
|
|
17
|
+
|
|
18
|
+
for marker in ("-", "*", "•", "–"):
|
|
19
|
+
if line.startswith(marker):
|
|
20
|
+
line = line[len(marker):].strip()
|
|
21
|
+
break
|
|
22
|
+
|
|
23
|
+
if line:
|
|
24
|
+
lines.append(line)
|
|
25
|
+
|
|
26
|
+
return lines
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# --- copied verbatim from memory_and_context.py ---
|
|
30
|
+
def _cosine_similarity(query_vec: np.ndarray, matrix: np.ndarray) -> np.ndarray:
|
|
31
|
+
q = query_vec / (np.linalg.norm(query_vec) + 1e-9)
|
|
32
|
+
norms = np.linalg.norm(matrix, axis=1, keepdims=True) + 1e-9
|
|
33
|
+
return (matrix / norms) @ q
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def test_parse_preference_lines_strips_markers_and_skips_noise():
|
|
37
|
+
text = """
|
|
38
|
+
# Header — should be skipped
|
|
39
|
+
Plain preference line
|
|
40
|
+
- Hyphen bullet
|
|
41
|
+
* Star bullet
|
|
42
|
+
• Round bullet
|
|
43
|
+
- En-dash bullet
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
Another plain line
|
|
47
|
+
""".strip()
|
|
48
|
+
|
|
49
|
+
result = _parse_preference_lines(text)
|
|
50
|
+
|
|
51
|
+
assert result == [
|
|
52
|
+
"Plain preference line",
|
|
53
|
+
"Hyphen bullet",
|
|
54
|
+
"Star bullet",
|
|
55
|
+
"Round bullet",
|
|
56
|
+
"En-dash bullet",
|
|
57
|
+
"Another plain line",
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def test_parse_preference_lines_empty_input_returns_empty_list():
|
|
62
|
+
assert _parse_preference_lines("") == []
|
|
63
|
+
assert _parse_preference_lines(" \n\n ") == []
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_cosine_similarity_known_values():
|
|
67
|
+
query = np.array([1.0, 0.0])
|
|
68
|
+
matrix = np.array([
|
|
69
|
+
[1.0, 0.0], # identical direction -> ~1.0
|
|
70
|
+
[0.0, 1.0], # orthogonal -> ~0.0
|
|
71
|
+
[1.0, 1.0], # 45 degrees -> ~0.7071
|
|
72
|
+
])
|
|
73
|
+
|
|
74
|
+
scores = _cosine_similarity(query, matrix)
|
|
75
|
+
|
|
76
|
+
assert np.isclose(scores[0], 1.0, atol=1e-6)
|
|
77
|
+
assert np.isclose(scores[1], 0.0, atol=1e-6)
|
|
78
|
+
assert np.isclose(scores[2], 1 / np.sqrt(2), atol=1e-6)
|