aw-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
agentwatch/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ from .wrapper import WatchedOpenAI, wrap, analyze_text, INGEST_URL, AgentBudgetExceeded, AgentBudgetCheckUnavailable
2
+
3
+ __version__ = "0.1.0"
4
+ __all__ = ["WatchedOpenAI", "wrap", "analyze_text", "INGEST_URL", "AgentBudgetExceeded", "AgentBudgetCheckUnavailable"]
agentwatch/wrapper.py ADDED
@@ -0,0 +1,499 @@
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+ import math
5
+ import queue
6
+ import re
7
+ import logging
8
+ import threading
9
+ import time
10
+ import uuid
11
+ from typing import Any, Dict, List, Optional, Set
12
+
13
+ _log = logging.getLogger("agentwatch")
14
+
15
+ import httpx
16
+ from openai import OpenAI as _OpenAI
17
+
18
+ # ---------------------------------------------------------------------------
19
+ # Risk detection — aligned with the TypeScript classifier (classifier.ts)
20
+ # ---------------------------------------------------------------------------
21
+
22
+ EMAIL_RE = re.compile(r"\b[A-Z0-9._%+-]{1,64}@[A-Z0-9.-]{1,253}\.[A-Z]{2,63}\b", re.IGNORECASE)
23
+ SSN_RE = re.compile(r"\b(?!000|666|9\d{2})\d{3}[- ]?(?!00)\d{2}[- ]?(?!0000)\d{4}\b")
24
+ AWS_ACCESS_KEY_RE = re.compile(r"\b(?:AKIA|ASIA|AIDA|AROA|AGPA|ANPA)[A-Z0-9]{16}\b")
25
+ STRIPE_SECRET_KEY_RE = re.compile(r"\bsk_(?:live|test)_[A-Za-z0-9]{16,}\b")
26
+ GITHUB_TOKEN_RE = re.compile(r"\b(?:gh[pousr]_[A-Za-z0-9_]{36,255}|github_pat_[A-Za-z0-9_]{22,255})\b")
27
+ CREDIT_CARD_CANDIDATE_RE = re.compile(r"(?=(?:^|[^\d])((?:\d[ -]?){13,19})(?!\d))")
28
+ JWT_CANDIDATE_RE = re.compile(r"\b[A-Za-z0-9_-]{10,512}\.[A-Za-z0-9_-]{10,4096}\.[A-Za-z0-9_-]{16,1024}\b")
29
+
30
+ MAX_SCAN_CHARS = 256 * 1024
31
+ EDGE_SCAN_CHARS = MAX_SCAN_CHARS // 2
32
+ MAX_STRUCTURED_CANDIDATES = 256
33
+ HIGH_ENTROPY_MIN_SCORE = 3.25
34
+ JWT_SIGNATURE_MIN_ENTROPY = 3.5
35
+
36
+ TAG_ORDER = (
37
+ "PII_EMAIL",
38
+ "PII_SSN",
39
+ "FINANCIAL_CREDIT_CARD",
40
+ "SECRET_AWS_ACCESS_KEY",
41
+ "SECRET_STRIPE",
42
+ "SECRET_GITHUB",
43
+ "SECRET_JWT",
44
+ )
45
+
46
+
47
+ def analyze_text(text: str) -> List[str]:
48
+ """Scan text for PII and secret risks. Returns canonical tag names."""
49
+ if not text:
50
+ return []
51
+
52
+ scan = _bounded_scan_text(text)
53
+ risks: Set[str] = set()
54
+
55
+ if EMAIL_RE.search(scan):
56
+ risks.add("PII_EMAIL")
57
+ if SSN_RE.search(scan):
58
+ risks.add("PII_SSN")
59
+ if _has_credit_card(scan):
60
+ risks.add("FINANCIAL_CREDIT_CARD")
61
+ if _has_aws_access_key(scan):
62
+ risks.add("SECRET_AWS_ACCESS_KEY")
63
+ if STRIPE_SECRET_KEY_RE.search(scan):
64
+ risks.add("SECRET_STRIPE")
65
+ if GITHUB_TOKEN_RE.search(scan):
66
+ risks.add("SECRET_GITHUB")
67
+ if _has_high_entropy_jwt(scan):
68
+ risks.add("SECRET_JWT")
69
+
70
+ return [tag for tag in TAG_ORDER if tag in risks]
71
+
72
+
73
+ # ---------------------------------------------------------------------------
74
+ # Classifier helpers
75
+ # ---------------------------------------------------------------------------
76
+
77
+ def _bounded_scan_text(text: str) -> str:
78
+ if len(text) <= MAX_SCAN_CHARS:
79
+ return text
80
+ return f"{text[:EDGE_SCAN_CHARS]}\n{text[-EDGE_SCAN_CHARS:]}"
81
+
82
+
83
+ def _has_credit_card(text: str) -> bool:
84
+ checked = 0
85
+ for match in CREDIT_CARD_CANDIDATE_RE.finditer(text):
86
+ checked += 1
87
+ if checked > MAX_STRUCTURED_CANDIDATES:
88
+ return False
89
+ digits = _digits_only(match.group(1))
90
+ if _is_likely_credit_card(digits):
91
+ return True
92
+ return False
93
+
94
+
95
+ def _is_likely_credit_card(digits: str) -> bool:
96
+ return 13 <= len(digits) <= 19 and _has_known_card_prefix(digits) and _passes_luhn(digits)
97
+
98
+
99
+ def _has_known_card_prefix(digits: str) -> bool:
100
+ prefix2 = _safe_int(digits[:2])
101
+ prefix3 = _safe_int(digits[:3])
102
+ prefix4 = _safe_int(digits[:4])
103
+ prefix6 = _safe_int(digits[:6])
104
+ length = len(digits)
105
+
106
+ if digits.startswith("4") and length in (13, 16, 19):
107
+ return True
108
+ if length == 15 and prefix2 in (34, 37):
109
+ return True
110
+ if length == 16 and (51 <= prefix2 <= 55 or 2221 <= prefix4 <= 2720):
111
+ return True
112
+ if length in (16, 19) and (
113
+ digits.startswith("6011")
114
+ or digits.startswith("65")
115
+ or 644 <= prefix3 <= 649
116
+ or 622126 <= prefix6 <= 622925
117
+ ):
118
+ return True
119
+ return length in (16, 17, 18, 19) and 3528 <= prefix4 <= 3589
120
+
121
+
122
+ def _passes_luhn(digits: str) -> bool:
123
+ total = 0
124
+ should_double = False
125
+ for char in reversed(digits):
126
+ digit = ord(char) - 48
127
+ if digit < 0 or digit > 9:
128
+ return False
129
+ if should_double:
130
+ digit *= 2
131
+ if digit > 9:
132
+ digit -= 9
133
+ total += digit
134
+ should_double = not should_double
135
+ return total % 10 == 0
136
+
137
+
138
+ def _has_aws_access_key(text: str) -> bool:
139
+ for match in AWS_ACCESS_KEY_RE.finditer(text):
140
+ if _shannon_entropy(match.group(0)) >= HIGH_ENTROPY_MIN_SCORE:
141
+ return True
142
+ return False
143
+
144
+
145
+ def _has_high_entropy_jwt(text: str) -> bool:
146
+ checked = 0
147
+ for match in JWT_CANDIDATE_RE.finditer(text):
148
+ checked += 1
149
+ if checked > MAX_STRUCTURED_CANDIDATES:
150
+ return False
151
+ if _is_high_entropy_jwt(match.group(0)):
152
+ return True
153
+ return False
154
+
155
+
156
+ def _is_high_entropy_jwt(token: str) -> bool:
157
+ parts = token.split(".")
158
+ if len(parts) != 3 or not _looks_like_jwt_header(parts[0]):
159
+ return False
160
+ return _shannon_entropy(parts[2]) >= JWT_SIGNATURE_MIN_ENTROPY
161
+
162
+
163
+ def _looks_like_jwt_header(segment: str) -> bool:
164
+ decoded = _base64url_decode(segment)
165
+ return '"alg"' in decoded and '"typ"' in decoded
166
+
167
+
168
+ def _base64url_decode(value: str) -> str:
169
+ normalized = value.replace("-", "+").replace("_", "/")
170
+ padding = (4 - len(normalized) % 4) % 4
171
+ try:
172
+ return base64.b64decode(normalized + "=" * padding).decode("utf-8", errors="replace")
173
+ except Exception:
174
+ return ""
175
+
176
+
177
+ def _shannon_entropy(value: str) -> float:
178
+ if not value:
179
+ return 0.0
180
+ counts: Dict[str, int] = {}
181
+ for char in value:
182
+ counts[char] = counts.get(char, 0) + 1
183
+ entropy = 0.0
184
+ length = len(value)
185
+ for count in counts.values():
186
+ probability = count / length
187
+ entropy -= probability * math.log2(probability)
188
+ return entropy
189
+
190
+
191
+ def _digits_only(value: str) -> str:
192
+ return "".join(c for c in value if "0" <= c <= "9")
193
+
194
+
195
+ def _safe_int(value: str) -> int:
196
+ return int(value) if value.isdigit() else -1
197
+
198
+
199
+ def _extract_messages_text(messages: Any) -> str:
200
+ """Recursively extract all text content from an OpenAI messages list."""
201
+ if not messages or not isinstance(messages, list):
202
+ return ""
203
+ chunks: List[str] = []
204
+ for msg in messages:
205
+ if not isinstance(msg, dict):
206
+ continue
207
+ role = msg.get("role")
208
+ if isinstance(role, str):
209
+ chunks.append(role)
210
+ content = msg.get("content")
211
+ if isinstance(content, str):
212
+ chunks.append(content)
213
+ elif isinstance(content, list):
214
+ for part in content:
215
+ if isinstance(part, dict):
216
+ for key in ("text", "input", "content"):
217
+ val = part.get(key)
218
+ if isinstance(val, str):
219
+ chunks.append(val)
220
+ return "\n".join(chunks)
221
+
222
+
223
+ def _extract_completion_text(response: Any) -> str:
224
+ """Extract text from an OpenAI ChatCompletion response."""
225
+ choices = getattr(response, "choices", None)
226
+ if not choices:
227
+ return ""
228
+ chunks: List[str] = []
229
+ for choice in choices:
230
+ message = getattr(choice, "message", None)
231
+ if message is not None:
232
+ content = getattr(message, "content", None)
233
+ if isinstance(content, str):
234
+ chunks.append(content)
235
+ return "\n".join(chunks)
236
+
237
+
238
+ # ---------------------------------------------------------------------------
239
+ # Async logger — single worker thread + bounded queue (never blocks caller)
240
+ # ---------------------------------------------------------------------------
241
+
242
+ INGEST_URL = "https://agentwatch-edge-proxy.agentwatch-proxy.workers.dev/v1/ingest"
243
+ MAX_PENDING_LOGS = 1024
244
+
245
+
246
+ class AgentBudgetExceeded(Exception):
247
+ """Raised when an agent session exceeds its configured token budget limit."""
248
+ def __init__(self, session_id: str, spent: float, limit: float) -> None:
249
+ super().__init__(f"Agent budget exceeded for session '{session_id}': spent ${spent:.4f}, limit ${limit:.4f}")
250
+ self.session_id = session_id
251
+ self.spent = spent
252
+ self.limit = limit
253
+
254
+
255
+ class AgentBudgetCheckUnavailable(Exception):
256
+ """Raised when enforcement_fail_open=False and the budget check endpoint is unreachable."""
257
+ def __init__(self, session_id: str, reason: str) -> None:
258
+ super().__init__(f"AgentWatch budget check unavailable for session '{session_id}': {reason}")
259
+ self.session_id = session_id
260
+ self.reason = reason
261
+
262
+
263
+ class _AsyncLogger:
264
+ """Background log dispatcher. Single daemon thread, bounded queue."""
265
+
266
+ def __init__(self, api_key: str, ingest_url: str = INGEST_URL, timeout: float = 2.0, max_pending: int = MAX_PENDING_LOGS) -> None:
267
+ self._api_key = api_key
268
+ self._ingest_url = ingest_url
269
+ self._timeout = timeout
270
+ self._queue: queue.Queue[Dict[str, Any]] = queue.Queue(maxsize=max_pending)
271
+ self._started = False
272
+ self._lock = threading.Lock()
273
+
274
+ def submit(self, payload: Dict[str, Any]) -> None:
275
+ try:
276
+ self._ensure_started()
277
+ self._queue.put_nowait(payload)
278
+ except Exception:
279
+ _log.debug("agentwatch: failed to enqueue log", exc_info=True)
280
+
281
+ def _ensure_started(self) -> None:
282
+ if self._started:
283
+ return
284
+ with self._lock:
285
+ if self._started:
286
+ return
287
+ thread = threading.Thread(target=self._run, name="agentwatch-logger", daemon=True)
288
+ thread.start()
289
+ self._started = True
290
+
291
+ def _run(self) -> None:
292
+ while True:
293
+ try:
294
+ payload = self._queue.get()
295
+ self._send(payload)
296
+ except Exception:
297
+ _log.debug("agentwatch: log processing error", exc_info=True)
298
+ finally:
299
+ try:
300
+ self._queue.task_done()
301
+ except Exception:
302
+ pass
303
+
304
+ def _send(self, payload: Dict[str, Any]) -> None:
305
+ try:
306
+ httpx.post(
307
+ self._ingest_url,
308
+ json=payload,
309
+ headers={"Authorization": f"Bearer {self._api_key}"},
310
+ timeout=self._timeout,
311
+ )
312
+ except Exception:
313
+ _log.debug("agentwatch: failed to send telemetry", exc_info=True)
314
+
315
+
316
+ # ---------------------------------------------------------------------------
317
+ # Public API
318
+ # ---------------------------------------------------------------------------
319
+
320
+ class WatchedOpenAI(_OpenAI):
321
+ """Drop-in replacement for openai.OpenAI with built-in telemetry."""
322
+
323
+ def __init__(
324
+ self,
325
+ *args: Any,
326
+ agentwatch_api_key: str,
327
+ agentwatch_project: Optional[str] = None,
328
+ agentwatch_team: Optional[str] = None,
329
+ agentwatch_session_id: Optional[str] = None,
330
+ agentwatch_session_budget_usd: Optional[float] = None,
331
+ agentwatch_monthly_budget_usd: Optional[float] = None,
332
+ agentwatch_enforcement_mode: bool = False,
333
+ agentwatch_enforcement_fail_open: bool = True,
334
+ ingest_url: str = INGEST_URL,
335
+ timeout_seconds: float = 2.0,
336
+ **kwargs: Any,
337
+ ) -> None:
338
+ super().__init__(*args, **kwargs)
339
+ self._logger = _AsyncLogger(api_key=agentwatch_api_key, ingest_url=ingest_url, timeout=timeout_seconds)
340
+ self._project = agentwatch_project
341
+ self._team = agentwatch_team
342
+ self._session_id = agentwatch_session_id or str(uuid.uuid4())
343
+ self._session_budget_usd = agentwatch_session_budget_usd
344
+ self._monthly_budget_usd = agentwatch_monthly_budget_usd
345
+ self._enforcement_mode = agentwatch_enforcement_mode
346
+ self._enforcement_fail_open = agentwatch_enforcement_fail_open
347
+ self._iteration_index = 0
348
+ self._orig_create = self.chat.completions.create
349
+ self.chat.completions.create = self._watched_create
350
+
351
+ def _watched_create(self, *args: Any, **kwargs: Any) -> Any:
352
+ if self._enforcement_mode and self._session_id and self._session_budget_usd is not None:
353
+ base_url = self._logger._ingest_url.rsplit("/", 1)[0]
354
+ budget_url = f"{base_url}/budget-check?session_id={self._session_id}&limit_usd={self._session_budget_usd}"
355
+ try:
356
+ resp = httpx.get(
357
+ budget_url,
358
+ headers={"Authorization": f"Bearer {self._logger._api_key}"},
359
+ timeout=self._logger._timeout,
360
+ )
361
+ resp.raise_for_status()
362
+ status = resp.json()
363
+ if status.get("exceeded"):
364
+ raise AgentBudgetExceeded(
365
+ session_id=self._session_id,
366
+ spent=status.get("spent_usd", 0.0),
367
+ limit=status.get("limit_usd", self._session_budget_usd),
368
+ )
369
+ except AgentBudgetExceeded:
370
+ raise
371
+ except Exception as e:
372
+ if not self._enforcement_fail_open:
373
+ raise AgentBudgetCheckUnavailable(session_id=self._session_id, reason=str(e)) from e
374
+ else:
375
+ _log.debug("agentwatch: budget check failed, failing open", exc_info=True)
376
+
377
+ prompt_text = _extract_messages_text(kwargs.get("messages"))
378
+ prompt_risks = set(analyze_text(prompt_text))
379
+
380
+ t0 = time.time()
381
+ response = self._orig_create(*args, **kwargs)
382
+ latency = int((time.time() - t0) * 1000)
383
+
384
+ # Scan completion text for leaked PII/secrets
385
+ completion_text = _extract_completion_text(response)
386
+ completion_risks = set(analyze_text(completion_text))
387
+ combined_risks = [tag for tag in TAG_ORDER if tag in (prompt_risks | completion_risks)]
388
+
389
+ usage = getattr(response, "usage", None)
390
+ prompt_tokens = getattr(usage, "prompt_tokens", 0) or 0
391
+ completion_tokens = getattr(usage, "completion_tokens", 0) or 0
392
+
393
+ payload: Dict[str, Any] = {
394
+ "model": kwargs.get("model", "unknown"),
395
+ "latency_ms": latency,
396
+ "prompt_tokens": prompt_tokens,
397
+ "completion_tokens": completion_tokens,
398
+ "identified_risks": combined_risks,
399
+ }
400
+ if self._project:
401
+ payload["project"] = self._project
402
+ if self._team:
403
+ payload["team"] = self._team
404
+ if self._session_id:
405
+ payload["session_id"] = self._session_id
406
+ self._iteration_index += 1
407
+ payload["iteration_index"] = self._iteration_index
408
+
409
+ self._logger.submit(payload)
410
+
411
+ return response
412
+
413
+
414
+ def wrap(
415
+ openai_client: Any,
416
+ *,
417
+ agentwatch_api_key: str,
418
+ agentwatch_project: Optional[str] = None,
419
+ agentwatch_team: Optional[str] = None,
420
+ agentwatch_session_id: Optional[str] = None,
421
+ agentwatch_session_budget_usd: Optional[float] = None,
422
+ agentwatch_monthly_budget_usd: Optional[float] = None,
423
+ agentwatch_enforcement_mode: bool = False,
424
+ agentwatch_enforcement_fail_open: bool = True,
425
+ ingest_url: str = INGEST_URL,
426
+ timeout_seconds: float = 2.0,
427
+ ) -> Any:
428
+ """Wrap an existing OpenAI client with AgentWatch telemetry via composition."""
429
+ logger = _AsyncLogger(api_key=agentwatch_api_key, ingest_url=ingest_url, timeout=timeout_seconds)
430
+ _project = agentwatch_project
431
+ _team = agentwatch_team
432
+ _session_id = agentwatch_session_id or str(uuid.uuid4())
433
+ # Use a mutable list so the inner function can modify the iteration count
434
+ _state = {"iteration_index": 0}
435
+ orig_create = openai_client.chat.completions.create
436
+
437
+ def watched_create(*args: Any, **kwargs: Any) -> Any:
438
+ if agentwatch_enforcement_mode and _session_id and agentwatch_session_budget_usd is not None:
439
+ base_url = logger._ingest_url.rsplit("/", 1)[0]
440
+ budget_url = f"{base_url}/budget-check?session_id={_session_id}&limit_usd={agentwatch_session_budget_usd}"
441
+ try:
442
+ resp = httpx.get(
443
+ budget_url,
444
+ headers={"Authorization": f"Bearer {logger._api_key}"},
445
+ timeout=logger._timeout,
446
+ )
447
+ resp.raise_for_status()
448
+ status = resp.json()
449
+ if status.get("exceeded"):
450
+ raise AgentBudgetExceeded(
451
+ session_id=_session_id,
452
+ spent=status.get("spent_usd", 0.0),
453
+ limit=status.get("limit_usd", agentwatch_session_budget_usd),
454
+ )
455
+ except AgentBudgetExceeded:
456
+ raise
457
+ except Exception as e:
458
+ if not agentwatch_enforcement_fail_open:
459
+ raise AgentBudgetCheckUnavailable(session_id=_session_id, reason=str(e)) from e
460
+ else:
461
+ _log.debug("agentwatch: budget check failed, failing open", exc_info=True)
462
+
463
+ prompt_text = _extract_messages_text(kwargs.get("messages"))
464
+ prompt_risks = set(analyze_text(prompt_text))
465
+
466
+ t0 = time.time()
467
+ response = orig_create(*args, **kwargs)
468
+ latency = int((time.time() - t0) * 1000)
469
+
470
+ completion_text = _extract_completion_text(response)
471
+ completion_risks = set(analyze_text(completion_text))
472
+ combined_risks = [tag for tag in TAG_ORDER if tag in (prompt_risks | completion_risks)]
473
+
474
+ usage = getattr(response, "usage", None)
475
+ prompt_tokens = getattr(usage, "prompt_tokens", 0) or 0
476
+ completion_tokens_val = getattr(usage, "completion_tokens", 0) or 0
477
+
478
+ payload: Dict[str, Any] = {
479
+ "model": kwargs.get("model", "unknown"),
480
+ "latency_ms": latency,
481
+ "prompt_tokens": prompt_tokens,
482
+ "completion_tokens": completion_tokens_val,
483
+ "identified_risks": combined_risks,
484
+ }
485
+ if _project:
486
+ payload["project"] = _project
487
+ if _team:
488
+ payload["team"] = _team
489
+ if _session_id:
490
+ payload["session_id"] = _session_id
491
+ _state["iteration_index"] += 1
492
+ payload["iteration_index"] = _state["iteration_index"]
493
+
494
+ logger.submit(payload)
495
+
496
+ return response
497
+
498
+ openai_client.chat.completions.create = watched_create
499
+ return openai_client
@@ -0,0 +1,9 @@
1
+ Metadata-Version: 2.4
2
+ Name: aw-sdk
3
+ Version: 0.1.0
4
+ Summary: Lightweight drop-in wrapper for the OpenAI Python client that logs telemetry and detects PII risks.
5
+ License: MIT
6
+ Project-URL: Homepage, https://github.com/agentwatch/agentwatch
7
+ Requires-Python: >=3.9
8
+ Requires-Dist: openai
9
+ Requires-Dist: httpx
@@ -0,0 +1,6 @@
1
+ agentwatch/__init__.py,sha256=R67VdcBDPU3C9yOsUr1eqCOl9w3WYEDobvRsj4BW18k,260
2
+ agentwatch/wrapper.py,sha256=sdy1sxXBiO2DsD9hQtBr36cOS7RK-BreoyKDil6idt0,18222
3
+ aw_sdk-0.1.0.dist-info/METADATA,sha256=gtfRaAH-Xvs2nZ6DnLQsZKrxWMe-G-SFIRc0h9KUmfM,302
4
+ aw_sdk-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
5
+ aw_sdk-0.1.0.dist-info/top_level.txt,sha256=IML7iKg-Q0BkJznZO5NsCBtfy5WUnk3Z4q7HzdK2keI,11
6
+ aw_sdk-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ agentwatch