dapplepot-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,243 @@
1
+ import logging
2
+ import random
3
+
4
+ import requests
5
+
6
+ from dapplepot_sdk._adapter import TraceAdapter
7
+ from dapplepot_sdk._buffer import EventBuffer
8
+ from dapplepot_sdk._interceptor import OnlineCheckInterceptor
9
+
10
+ logging.getLogger(__name__).addHandler(logging.NullHandler())
11
+ logger = logging.getLogger(__name__)
12
+
13
+ _ONLINE_CAPABLE_SUB_CHECKS: frozenset[str] = frozenset({
14
+ 'PI-01a', 'PI-01b', 'PI-01c', 'PI-02a', 'PI-05a', 'PI-08a',
15
+ 'SID-01a', 'SID-01c', 'SID-02a',
16
+ 'IOH-01a',
17
+ 'EA-01a', 'EA-02b',
18
+ })
19
+
20
+
21
+ class DapplePotBlockedError(Exception):
22
+ def __init__(self, signal: str, reason: str, session_id: str):
23
+ super().__init__(f'[{signal}] {reason}')
24
+ self.signal = signal
25
+ self.reason = reason
26
+ self.session_id = session_id
27
+
28
+
29
+ class DapplePotSessionTerminatedError(Exception):
30
+ pass
31
+
32
+
33
+ class DapplePot:
34
+ def __init__(
35
+ self,
36
+ sdk_key: str,
37
+ tenant_id: str,
38
+ agent_id: str,
39
+ ingest_url: str,
40
+ *,
41
+ sample_rate: float = 1.0,
42
+ pii_scrubber=None,
43
+ redact_keys: list = None,
44
+ flush_interval_ms: int = 500,
45
+ flush_batch_size: int = 100,
46
+ ):
47
+ self._sdk_key = sdk_key
48
+ self._tenant_id = tenant_id
49
+ self._agent_id = agent_id
50
+ self._ingest_url = ingest_url.rstrip('/')
51
+ self._sample_rate = sample_rate
52
+ self._pii_scrubber = pii_scrubber
53
+ self._redact_keys = set(redact_keys or [])
54
+ self._tool_allowlist = None
55
+
56
+ check_actions = self._fetch_check_actions()
57
+
58
+ self._buffer = EventBuffer(
59
+ ingest_url=self._ingest_url,
60
+ sdk_key=self._sdk_key,
61
+ flush_interval_ms=flush_interval_ms,
62
+ flush_batch_size=flush_batch_size,
63
+ )
64
+ self._interceptor = OnlineCheckInterceptor(
65
+ check_actions=check_actions,
66
+ buffer=self._buffer,
67
+ client=self,
68
+ )
69
+
70
+ tool_manifest, max_tool_calls = self._fetch_tool_manifest()
71
+ ea01a_action = check_actions.get('EA-01a', 'block_call')
72
+ ea02b_action = check_actions.get('EA-02b', 'alert')
73
+ self._interceptor.set_tool_manifest(
74
+ manifest=tool_manifest,
75
+ action=ea01a_action,
76
+ max_tool_calls=max_tool_calls,
77
+ ea02b_action=ea02b_action,
78
+ )
79
+
80
+ self._framework = 'unknown'
81
+
82
+ # ── startup ───────────────────────────────────────────────────────────────
83
+
84
+ def _fetch_check_actions(self) -> dict[str, str]:
85
+ """Pull per-subcheck online config from the API. Returns {sub_check_id: action}."""
86
+ url = f'{self._ingest_url}/v1/sdk/security/agents/{self._agent_id}/subcheck-config'
87
+ try:
88
+ resp = requests.get(
89
+ url,
90
+ headers={'Authorization': f'Bearer {self._sdk_key}'},
91
+ timeout=5,
92
+ )
93
+ resp.raise_for_status()
94
+ overrides: dict = resp.json().get('overrides', {})
95
+ check_actions = {}
96
+ for sub_check_id, cfg in overrides.items():
97
+ if cfg.get('online_detection') and sub_check_id in _ONLINE_CAPABLE_SUB_CHECKS:
98
+ check_actions[sub_check_id] = cfg.get('action', 'alert')
99
+ logger.debug('Loaded %d online checks from API', len(check_actions))
100
+ return check_actions
101
+ except Exception as exc:
102
+ logger.warning(
103
+ 'Could not fetch online check config from %s: %s — online checks disabled',
104
+ url, exc,
105
+ )
106
+ return {}
107
+
108
+ def _fetch_tool_manifest(self) -> tuple[list[str], int | None]:
109
+ """Fetch the tool manifest and max_tool_calls_per_session from the API."""
110
+ url = f'{self._ingest_url}/v1/sdk/security/agents/{self._agent_id}/tool-manifest'
111
+ try:
112
+ resp = requests.get(
113
+ url,
114
+ headers={'Authorization': f'Bearer {self._sdk_key}'},
115
+ timeout=5,
116
+ )
117
+ resp.raise_for_status()
118
+ data = resp.json()
119
+ manifest = data.get('tool_manifest') or []
120
+ max_calls = data.get('max_tool_calls_per_session')
121
+ logger.debug('Loaded tool manifest (%d tools, max_calls=%s)', len(manifest), max_calls)
122
+ return manifest, max_calls
123
+ except Exception as exc:
124
+ logger.warning(
125
+ 'Could not fetch tool manifest from %s: %s — EA-01a/EA-02b disabled',
126
+ url, exc,
127
+ )
128
+ return [], None
129
+
130
+ # ── internal helpers ──────────────────────────────────────────────────────
131
+
132
+ def _adapter(self, framework: str) -> TraceAdapter:
133
+ return TraceAdapter(
134
+ tenant_id=self._tenant_id,
135
+ agent_id=self._agent_id,
136
+ framework=framework,
137
+ )
138
+
139
+ def _should_sample(self) -> bool:
140
+ return random.random() < self._sample_rate
141
+
142
+ def _scrub(self, event: dict) -> dict:
143
+ if not self._pii_scrubber and not self._redact_keys:
144
+ return event
145
+ payload = event.get('payload', {})
146
+ if self._pii_scrubber:
147
+ payload = self._pii_scrubber.scrub_value(payload)
148
+ if self._redact_keys:
149
+ payload = self._redact_keys_in(payload)
150
+ return {**event, 'payload': payload}
151
+
152
+ def _redact_keys_in(self, obj):
153
+ if isinstance(obj, dict):
154
+ return {k: '[REDACTED]' if k in self._redact_keys else self._redact_keys_in(v)
155
+ for k, v in obj.items()}
156
+ if isinstance(obj, list):
157
+ return [self._redact_keys_in(i) for i in obj]
158
+ return obj
159
+
160
+ def _process_event(self, event: dict) -> None:
161
+ event = self._interceptor.evaluate(event)
162
+ event = self._scrub(event)
163
+ self._buffer.push(event)
164
+
165
+ # ── public API ────────────────────────────────────────────────────────────
166
+
167
+ def instrument_anthropic(self) -> None:
168
+ """Patch the Anthropic SDK so all messages.create() calls are traced automatically.
169
+
170
+ Call once after DapplePot() is initialised, before creating your Anthropic client.
171
+ The standard anthropic package is unaffected — upgrade it freely at any time.
172
+
173
+ Usage::
174
+
175
+ import anthropic
176
+ from dapplepot_sdk import DapplePot
177
+
178
+ dp = DapplePot(...)
179
+ dp.instrument_anthropic()
180
+
181
+ client = anthropic.Anthropic(api_key="...")
182
+ """
183
+ from dapplepot_sdk import anthropic as _dp_anth
184
+ _dp_anth._patch(self)
185
+ self._framework = 'anthropic'
186
+
187
+ def instrument_openai(self) -> None:
188
+ """Patch the OpenAI SDK so all chat.completions.create() calls are traced automatically.
189
+
190
+ Call once after DapplePot() is initialised, before creating your OpenAI client.
191
+
192
+ Usage::
193
+
194
+ import openai
195
+ from dapplepot_sdk import DapplePot
196
+
197
+ dp = DapplePot(...)
198
+ dp.instrument_openai()
199
+
200
+ client = openai.OpenAI(api_key="...")
201
+ """
202
+ from dapplepot_sdk import openai as _dp_openai
203
+ _dp_openai._patch(self)
204
+ self._framework = 'openai'
205
+
206
+ def callback_handler(self, session_id: str = None, user_context_id: str = None,
207
+ user_tenant_id: str = None):
208
+ """Return a fresh LangChain/LangGraph CallbackHandler for one session."""
209
+ from dapplepot_sdk._langchain import DapplePotCallbackHandler
210
+ self._framework = 'langchain'
211
+ return DapplePotCallbackHandler(self, session_id=session_id,
212
+ user_context_id=user_context_id,
213
+ user_tenant_id=user_tenant_id)
214
+
215
+ def session(self, session_id: str = None, user_context_id: str = None,
216
+ user_tenant_id: str = None):
217
+ """Context manager that wraps OpenAI / Anthropic calls in a DapplePot session."""
218
+ from dapplepot_sdk.session import SessionContext
219
+ return SessionContext(self, session_id=session_id, user_context_id=user_context_id,
220
+ user_tenant_id=user_tenant_id)
221
+
222
+ def node(self, node_name: str, input=None):
223
+ """Context manager to trace a named step inside an active dp.session().
224
+
225
+ Emits node_start on enter and node_end / node_error on exit.
226
+ Use this to add structure to your agent loop — it is entirely optional.
227
+
228
+ Usage::
229
+
230
+ with dp.node("retrieval", input=query):
231
+ docs = vector_store.search(query)
232
+
233
+ with dp.node("call_model"):
234
+ response = client.messages.create(...)
235
+ """
236
+ from dapplepot_sdk._node_context import NodeContext
237
+ from dapplepot_sdk.session import get_current_session_id
238
+ return NodeContext(self, session_id=get_current_session_id(),
239
+ node_name=node_name, input=input)
240
+
241
+ def shutdown(self, timeout_ms: int = 5000) -> None:
242
+ """Flush remaining events and stop background threads."""
243
+ self._buffer.shutdown(timeout_ms=timeout_ms)
@@ -0,0 +1,168 @@
1
+ import datetime
2
+ import uuid
3
+
4
+
5
+ def _now() -> str:
6
+ return datetime.datetime.now(datetime.timezone.utc).strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'
7
+
8
+
9
+ def first_user_text(messages: list) -> "str | None":
10
+ """Extract the first user message text from a standard messages list."""
11
+ for msg in messages:
12
+ if not isinstance(msg, dict):
13
+ continue
14
+ if msg.get("role") in ("user", "human"):
15
+ content = msg.get("content", "")
16
+ if isinstance(content, str) and content:
17
+ return content
18
+ if isinstance(content, list):
19
+ for block in content:
20
+ if isinstance(block, dict) and block.get("type") == "text":
21
+ t = block.get("text", "")
22
+ if t:
23
+ return t
24
+ return None
25
+
26
+
27
+ class TraceAdapter:
28
+ def __init__(self, tenant_id: str, agent_id: str, framework: str):
29
+ self._tenant_id = tenant_id
30
+ self._agent_id = agent_id
31
+ self._framework = framework
32
+
33
+ def _base(self, session_id: str, event_type: str) -> dict:
34
+ return {
35
+ 'dp_tenant_id': self._tenant_id,
36
+ 'dp_agent_id': self._agent_id,
37
+ 'dp_session_id': session_id,
38
+ 'dp_event_type': event_type,
39
+ 'dp_schema_version': '2',
40
+ 'dp_sampled': True,
41
+ 'dp_framework': self._framework,
42
+ 'ts': _now(),
43
+ 'event_id': str(uuid.uuid4()),
44
+ 'payload': {},
45
+ }
46
+
47
+ def session_start(self, session_id: str, user_context_id: str = None, metadata=None,
48
+ input=None, user_tenant_id: str = None) -> dict:
49
+ e = self._base(session_id, 'session_start')
50
+ if user_context_id:
51
+ e['user_context_id'] = user_context_id
52
+ if user_tenant_id:
53
+ e['user_tenant_id'] = user_tenant_id
54
+ e['payload'] = {'session_id': session_id, 'framework': self._framework, 'agent_id': self._agent_id}
55
+ if user_context_id:
56
+ e['payload']['user_context_id'] = user_context_id
57
+ if user_tenant_id:
58
+ e['payload']['user_tenant_id'] = user_tenant_id
59
+ if metadata:
60
+ e['payload']['metadata'] = metadata
61
+ if input is not None:
62
+ e['payload']['input'] = input
63
+ return e
64
+
65
+ def session_end(self, session_id: str, output=None, latency_ms=None, total_tokens=None) -> dict:
66
+ e = self._base(session_id, 'session_end')
67
+ e['payload'] = {}
68
+ if output is not None:
69
+ e['payload']['output'] = output
70
+ if latency_ms is not None:
71
+ e['payload']['latency_ms'] = latency_ms
72
+ if total_tokens is not None:
73
+ e['payload']['total_tokens'] = total_tokens
74
+ return e
75
+
76
+ def session_error(self, session_id: str, error_type: str, error_message: str,
77
+ traceback: str = None, exit_reason: str = None) -> dict:
78
+ e = self._base(session_id, 'session_error')
79
+ e['payload'] = {'error_type': error_type, 'error_message': error_message}
80
+ if traceback:
81
+ e['payload']['traceback'] = traceback
82
+ if exit_reason:
83
+ e['payload']['exit_reason'] = exit_reason
84
+ return e
85
+
86
+ def node_start(self, session_id: str, node_name: str, parent_span_id=None, input=None) -> dict:
87
+ e = self._base(session_id, 'node_start')
88
+ e['payload'] = {'node_name': node_name}
89
+ if parent_span_id:
90
+ e['payload']['parent_span_id'] = parent_span_id
91
+ if input is not None:
92
+ e['payload']['input'] = input
93
+ return e
94
+
95
+ def node_end(self, session_id: str, node_name: str, output=None, latency_ms=None) -> dict:
96
+ e = self._base(session_id, 'node_end')
97
+ e['payload'] = {'node_name': node_name}
98
+ if output is not None:
99
+ e['payload']['output'] = output
100
+ if latency_ms is not None:
101
+ e['payload']['latency_ms'] = latency_ms
102
+ return e
103
+
104
+ def node_error(self, session_id: str, node_name: str, error_type: str, error_message: str, traceback: str = None) -> dict:
105
+ e = self._base(session_id, 'node_error')
106
+ e['payload'] = {'node_name': node_name, 'error_type': error_type, 'error_message': error_message}
107
+ if traceback:
108
+ e['payload']['traceback'] = traceback
109
+ return e
110
+
111
+ def llm_start(self, session_id: str, model: str, messages: list,
112
+ temperature=None, max_tokens=None, tools=None) -> dict:
113
+ e = self._base(session_id, 'llm_start')
114
+ e['payload'] = {'model': model, 'messages': messages}
115
+ if temperature is not None:
116
+ e['payload']['temperature'] = temperature
117
+ if max_tokens is not None:
118
+ e['payload']['max_tokens'] = max_tokens
119
+ if tools:
120
+ e['payload']['tools'] = tools
121
+ return e
122
+
123
+ def llm_end(self, session_id: str, completion: str, model: str | None = None, finish_reason=None, usage=None, latency_ms=None) -> dict:
124
+ e = self._base(session_id, 'llm_end')
125
+ e['payload'] = {'completion': completion}
126
+ if model:
127
+ e['payload']['model'] = model
128
+ if finish_reason:
129
+ e['payload']['finish_reason'] = finish_reason
130
+ if usage:
131
+ e['payload']['usage'] = usage
132
+ if latency_ms is not None:
133
+ e['payload']['latency_ms'] = latency_ms
134
+ return e
135
+
136
+ def llm_error(self, session_id: str, model: str | None = None, error_type: str = None,
137
+ error_message: str = None, latency_ms: int = None) -> dict:
138
+ e = self._base(session_id, 'llm_error')
139
+ e['payload'] = {}
140
+ if model:
141
+ e['payload']['model'] = model
142
+ if error_type:
143
+ e['payload']['error_type'] = error_type
144
+ if error_message:
145
+ e['payload']['error_message'] = error_message
146
+ if latency_ms is not None:
147
+ e['payload']['latency_ms'] = latency_ms
148
+ return e
149
+
150
+ def tool_start(self, session_id: str, tool_name: str, tool_input) -> dict:
151
+ e = self._base(session_id, 'tool_start')
152
+ e['payload'] = {'tool_name': tool_name, 'tool_input': tool_input}
153
+ return e
154
+
155
+ def tool_error(self, session_id: str, tool_name: str, error_message: str,
156
+ error_type: str = "ToolError", tool_input=None) -> dict:
157
+ e = self._base(session_id, 'tool_error')
158
+ e['payload'] = {'tool_name': tool_name, 'error_type': error_type, 'error_message': error_message}
159
+ if tool_input is not None:
160
+ e['payload']['tool_input'] = tool_input
161
+ return e
162
+
163
+ def tool_end(self, session_id: str, tool_name: str, tool_output, latency_ms=None) -> dict:
164
+ e = self._base(session_id, 'tool_end')
165
+ e['payload'] = {'tool_name': tool_name, 'tool_output': tool_output}
166
+ if latency_ms is not None:
167
+ e['payload']['latency_ms'] = latency_ms
168
+ return e
@@ -0,0 +1,115 @@
1
+ import atexit
2
+ import json
3
+ import queue
4
+ import threading
5
+ import time
6
+ import logging
7
+
8
+ import requests
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class _SafeEncoder(json.JSONEncoder):
14
+ def default(self, obj):
15
+ if hasattr(obj, 'model_dump'):
16
+ return obj.model_dump()
17
+ if hasattr(obj, 'dict'):
18
+ return obj.dict()
19
+ if hasattr(obj, '__dict__'):
20
+ return obj.__dict__
21
+ return repr(obj)
22
+
23
+
24
+ class EventBuffer:
25
+ def __init__(self, ingest_url: str, sdk_key: str,
26
+ flush_interval_ms: int = 500, flush_batch_size: int = 100):
27
+ self._url = ingest_url.rstrip('/') + '/v1/ingest/events'
28
+ self._sdk_key = sdk_key
29
+ self._interval = flush_interval_ms / 1000.0
30
+ self._batch_size = flush_batch_size
31
+ self._queue: queue.Queue = queue.Queue()
32
+ self._session_samples: dict = {}
33
+ self._session_seqs: dict = {}
34
+ self._stop = threading.Event()
35
+ self._thread = threading.Thread(target=self._loop, daemon=True, name='telemetry')
36
+ self._thread.start()
37
+ # Flush remaining events on normal process exit so the daemon thread
38
+ # dying at interpreter shutdown doesn't silently drop buffered events.
39
+ atexit.register(self.shutdown)
40
+
41
+ # ── sampling ──────────────────────────────────────────────────────────────
42
+
43
+ def set_sampled(self, session_id: str, sampled: bool) -> None:
44
+ self._session_samples[session_id] = sampled
45
+
46
+ def is_sampled(self, session_id: str) -> bool:
47
+ return self._session_samples.get(session_id, True)
48
+
49
+ # ── push ──────────────────────────────────────────────────────────────────
50
+
51
+ def push(self, event: dict) -> None:
52
+ sid = event.get('dp_session_id')
53
+ if sid and not self.is_sampled(sid):
54
+ return
55
+ # Stamp a monotonic per-session sequence_index for SDK paths that don't
56
+ # set one (openai, anthropic, session.py). LangChain sets it in _emit()
57
+ # before calling here, so we leave those events unchanged.
58
+ if 'sequence_index' not in event and sid:
59
+ n = self._session_seqs.get(sid, 0)
60
+ self._session_seqs[sid] = n + 1
61
+ event = {**event, 'sequence_index': n}
62
+ self._queue.put(event)
63
+
64
+ def push_sync(self, event: dict) -> None:
65
+ """Flush a single event immediately (used before raising a blocked error)."""
66
+ self._send([event])
67
+
68
+ def flush_sync(self) -> None:
69
+ """Drain all queued events immediately on the calling thread.
70
+ Safe to call after shutdown() — push() always enqueues regardless of stop state."""
71
+ self._drain()
72
+
73
+ # ── flush loop ────────────────────────────────────────────────────────────
74
+
75
+ def _loop(self) -> None:
76
+ while not self._stop.is_set():
77
+ time.sleep(self._interval)
78
+ self._drain()
79
+
80
+ def _drain(self) -> None:
81
+ batch = []
82
+ try:
83
+ while len(batch) < self._batch_size:
84
+ batch.append(self._queue.get_nowait())
85
+ except queue.Empty:
86
+ pass
87
+ if batch:
88
+ self._send(batch)
89
+
90
+ def _send(self, batch: list, retries: int = 3) -> None:
91
+ for attempt in range(retries):
92
+ try:
93
+ resp = requests.post(
94
+ self._url,
95
+ data=json.dumps({'events': batch}, cls=_SafeEncoder),
96
+ headers={
97
+ 'Authorization': f'Bearer {self._sdk_key}',
98
+ 'Content-Type': 'application/json',
99
+ },
100
+ timeout=5,
101
+ )
102
+ resp.raise_for_status()
103
+ return
104
+ except Exception as exc:
105
+ if attempt == retries - 1:
106
+ logger.error('event flush failed after %d retries: %s', retries, exc)
107
+ else:
108
+ time.sleep(0.1 * (2 ** attempt))
109
+
110
+ # ── shutdown ──────────────────────────────────────────────────────────────
111
+
112
+ def shutdown(self, timeout_ms: int = 5000) -> None:
113
+ self._stop.set()
114
+ self._thread.join(timeout=timeout_ms / 1000.0)
115
+ self._drain()