agentreplay 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
agentreplay/sdk.py ADDED
@@ -0,0 +1,578 @@
1
+ # Copyright 2025 Sushanth (https://github.com/sushanthpy)
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """
16
+ Agentreplay SDK - Ergonomic top-level API.
17
+
18
+ This module provides the developer-friendly API surface:
19
+ - init() - Initialize SDK from env vars or explicit config
20
+ - get_client() - Get singleton client
21
+ - flush() - Flush pending spans
22
+ - shutdown() - Graceful shutdown
23
+
24
+ Example:
25
+ >>> from agentreplay import init, traceable, wrap_openai, flush
26
+ >>>
27
+ >>> # Initialize (reads env vars by default)
28
+ >>> init()
29
+ >>>
30
+ >>> # Wrap OpenAI for auto-tracing
31
+ >>> from openai import OpenAI
32
+ >>> client = wrap_openai(OpenAI())
33
+ >>>
34
+ >>> # Or use decorator
35
+ >>> @traceable
36
+ >>> def my_function():
37
+ ... return "result"
38
+ >>>
39
+ >>> # Flush before exit (serverless)
40
+ >>> flush()
41
+ """
42
+
43
+ import os
44
+ import atexit
45
+ import signal
46
+ import threading
47
+ import logging
48
+ from typing import Optional, Dict, Any, Callable
49
+ from dataclasses import dataclass, field
50
+
51
+ logger = logging.getLogger(__name__)
52
+
53
+ # =============================================================================
54
+ # Global State
55
+ # =============================================================================
56
+
57
+ @dataclass
58
+ class SDKConfig:
59
+ """Resolved SDK configuration."""
60
+ # Connection
61
+ api_key: Optional[str] = None
62
+ base_url: str = "http://localhost:8080"
63
+ tenant_id: int = 1
64
+ project_id: int = 0
65
+ agent_id: int = 1
66
+
67
+ # Environment
68
+ environment: str = "development"
69
+ service_name: str = "agentreplay-app"
70
+
71
+ # Behavior
72
+ enabled: bool = True
73
+ debug: bool = False
74
+ strict: bool = False
75
+
76
+ # Batching
77
+ batch_size: int = 100
78
+ flush_interval: float = 5.0
79
+ max_queue_size: int = 10000
80
+
81
+ # Timeouts
82
+ timeout: float = 30.0
83
+ flush_timeout: float = 5.0
84
+
85
+ # Privacy
86
+ capture_input: bool = True
87
+ capture_output: bool = True
88
+ redact_patterns: list = field(default_factory=list)
89
+ max_payload_size: int = 100000
90
+
91
+
92
+ # Global state
93
+ _config: Optional[SDKConfig] = None
94
+ _client = None
95
+ _batching_client = None
96
+ _initialized = False
97
+ _lock = threading.Lock()
98
+
99
+
100
+ # =============================================================================
101
+ # Environment Variable Helpers
102
+ # =============================================================================
103
+
104
+ def _get_env(key: str, default: Optional[str] = None) -> Optional[str]:
105
+ """Get environment variable."""
106
+ return os.environ.get(key, default)
107
+
108
+
109
+ def _get_env_bool(key: str, default: bool = False) -> bool:
110
+ """Get boolean environment variable."""
111
+ val = os.environ.get(key, "").lower()
112
+ if val in ("1", "true", "yes", "on"):
113
+ return True
114
+ if val in ("0", "false", "no", "off"):
115
+ return False
116
+ return default
117
+
118
+
119
+ def _get_env_int(key: str, default: int) -> int:
120
+ """Get integer environment variable."""
121
+ val = os.environ.get(key)
122
+ if val is None:
123
+ return default
124
+ try:
125
+ return int(val)
126
+ except ValueError:
127
+ return default
128
+
129
+
130
+ def _get_env_float(key: str, default: float) -> float:
131
+ """Get float environment variable."""
132
+ val = os.environ.get(key)
133
+ if val is None:
134
+ return default
135
+ try:
136
+ return float(val)
137
+ except ValueError:
138
+ return default
139
+
140
+
141
+ # =============================================================================
142
+ # Initialization
143
+ # =============================================================================
144
+
145
+ def init(
146
+ *,
147
+ api_key: Optional[str] = None,
148
+ base_url: Optional[str] = None,
149
+ tenant_id: Optional[int] = None,
150
+ project_id: Optional[int] = None,
151
+ agent_id: Optional[int] = None,
152
+ environment: Optional[str] = None,
153
+ service_name: Optional[str] = None,
154
+ enabled: Optional[bool] = None,
155
+ debug: Optional[bool] = None,
156
+ strict: Optional[bool] = None,
157
+ batch_size: Optional[int] = None,
158
+ flush_interval: Optional[float] = None,
159
+ max_queue_size: Optional[int] = None,
160
+ timeout: Optional[float] = None,
161
+ capture_input: Optional[bool] = None,
162
+ capture_output: Optional[bool] = None,
163
+ redact_patterns: Optional[list] = None,
164
+ ) -> SDKConfig:
165
+ """Initialize the Agentreplay SDK.
166
+
167
+ Reads configuration from environment variables by default, with
168
+ explicit parameters taking precedence.
169
+
170
+ Environment Variables:
171
+ AGENTREPLAY_API_KEY: API key for authentication
172
+ AGENTREPLAY_URL: Base URL (default: http://localhost:8080)
173
+ AGENTREPLAY_TENANT_ID: Tenant ID (default: 1)
174
+ AGENTREPLAY_PROJECT_ID: Project ID (default: 0)
175
+ AGENTREPLAY_AGENT_ID: Agent ID (default: 1)
176
+ AGENTREPLAY_ENVIRONMENT: Environment name (default: development)
177
+ AGENTREPLAY_SERVICE_NAME: Service name (default: agentreplay-app)
178
+ AGENTREPLAY_ENABLED: Enable SDK (default: true)
179
+ AGENTREPLAY_DEBUG: Enable debug logging (default: false)
180
+ AGENTREPLAY_STRICT: Strict mode - throw on missing API key (default: false)
181
+ AGENTREPLAY_BATCH_SIZE: Batch size (default: 100)
182
+ AGENTREPLAY_FLUSH_INTERVAL: Flush interval seconds (default: 5.0)
183
+ AGENTREPLAY_MAX_QUEUE_SIZE: Max queue size (default: 10000)
184
+ AGENTREPLAY_CAPTURE_INPUT: Capture inputs (default: true)
185
+ AGENTREPLAY_CAPTURE_OUTPUT: Capture outputs (default: true)
186
+
187
+ Args:
188
+ api_key: API key (overrides env var)
189
+ base_url: Base URL (overrides env var)
190
+ tenant_id: Tenant ID (overrides env var)
191
+ project_id: Project ID (overrides env var)
192
+ agent_id: Agent ID (overrides env var)
193
+ environment: Environment name (overrides env var)
194
+ service_name: Service name (overrides env var)
195
+ enabled: Enable SDK (overrides env var)
196
+ debug: Enable debug logging (overrides env var)
197
+ strict: Strict mode (overrides env var)
198
+ batch_size: Batch size (overrides env var)
199
+ flush_interval: Flush interval (overrides env var)
200
+ max_queue_size: Max queue size (overrides env var)
201
+ timeout: Request timeout (overrides env var)
202
+ capture_input: Capture inputs (overrides env var)
203
+ capture_output: Capture outputs (overrides env var)
204
+ redact_patterns: Patterns to redact
205
+
206
+ Returns:
207
+ SDKConfig: Resolved configuration
208
+
209
+ Raises:
210
+ ValueError: If strict=True and API key is missing
211
+
212
+ Example:
213
+ >>> # Use environment variables
214
+ >>> init()
215
+
216
+ >>> # Explicit configuration
217
+ >>> init(
218
+ ... api_key="ar_xxx",
219
+ ... base_url="https://api.agentreplay.dev",
220
+ ... environment="production",
221
+ ... debug=True
222
+ ... )
223
+ """
224
+ global _config, _client, _batching_client, _initialized
225
+
226
+ with _lock:
227
+ if _initialized:
228
+ logger.debug("SDK already initialized, returning existing config")
229
+ return _config
230
+
231
+ # Build config from env vars + explicit params
232
+ _config = SDKConfig(
233
+ api_key=api_key or _get_env("AGENTREPLAY_API_KEY"),
234
+ base_url=(base_url or _get_env("AGENTREPLAY_URL", "http://localhost:8080")).rstrip("/"),
235
+ tenant_id=tenant_id if tenant_id is not None else _get_env_int("AGENTREPLAY_TENANT_ID", 1),
236
+ project_id=project_id if project_id is not None else _get_env_int("AGENTREPLAY_PROJECT_ID", 0),
237
+ agent_id=agent_id if agent_id is not None else _get_env_int("AGENTREPLAY_AGENT_ID", 1),
238
+ environment=environment or _get_env("AGENTREPLAY_ENVIRONMENT", "development"),
239
+ service_name=service_name or _get_env("AGENTREPLAY_SERVICE_NAME", "agentreplay-app"),
240
+ enabled=enabled if enabled is not None else _get_env_bool("AGENTREPLAY_ENABLED", True),
241
+ debug=debug if debug is not None else _get_env_bool("AGENTREPLAY_DEBUG", False),
242
+ strict=strict if strict is not None else _get_env_bool("AGENTREPLAY_STRICT", False),
243
+ batch_size=batch_size if batch_size is not None else _get_env_int("AGENTREPLAY_BATCH_SIZE", 100),
244
+ flush_interval=flush_interval if flush_interval is not None else _get_env_float("AGENTREPLAY_FLUSH_INTERVAL", 5.0),
245
+ max_queue_size=max_queue_size if max_queue_size is not None else _get_env_int("AGENTREPLAY_MAX_QUEUE_SIZE", 10000),
246
+ timeout=timeout if timeout is not None else _get_env_float("AGENTREPLAY_TIMEOUT", 30.0),
247
+ capture_input=capture_input if capture_input is not None else _get_env_bool("AGENTREPLAY_CAPTURE_INPUT", True),
248
+ capture_output=capture_output if capture_output is not None else _get_env_bool("AGENTREPLAY_CAPTURE_OUTPUT", True),
249
+ redact_patterns=redact_patterns or [],
250
+ )
251
+
252
+ # Validate in strict mode
253
+ if _config.strict and not _config.api_key:
254
+ raise ValueError(
255
+ "Agentreplay: API key required in strict mode. "
256
+ "Set AGENTREPLAY_API_KEY or pass api_key parameter."
257
+ )
258
+
259
+ # Setup logging
260
+ if _config.debug:
261
+ logging.basicConfig(level=logging.DEBUG)
262
+ logger.setLevel(logging.DEBUG)
263
+
264
+ # Log initialization
265
+ if _config.debug:
266
+ logger.info(f"[Agentreplay] Initializing SDK")
267
+ logger.info(f" base_url: {_config.base_url}")
268
+ logger.info(f" tenant_id: {_config.tenant_id}")
269
+ logger.info(f" project_id: {_config.project_id}")
270
+ logger.info(f" environment: {_config.environment}")
271
+ logger.info(f" api_key: {'***' + _config.api_key[-4:] if _config.api_key else 'not set'}")
272
+
273
+ # Warn if no API key (non-strict)
274
+ if not _config.api_key and not _config.strict:
275
+ logger.warning(
276
+ "[Agentreplay] No API key configured. "
277
+ "Set AGENTREPLAY_API_KEY or pass api_key parameter."
278
+ )
279
+
280
+ # Create clients if enabled
281
+ if _config.enabled:
282
+ from agentreplay.client import AgentreplayClient
283
+ from agentreplay.batching import BatchingAgentreplayClient
284
+
285
+ _client = AgentreplayClient(
286
+ url=_config.base_url,
287
+ tenant_id=_config.tenant_id,
288
+ project_id=_config.project_id,
289
+ agent_id=_config.agent_id,
290
+ timeout=_config.timeout,
291
+ )
292
+
293
+ _batching_client = BatchingAgentreplayClient(
294
+ client=_client,
295
+ batch_size=_config.batch_size,
296
+ flush_interval=_config.flush_interval,
297
+ max_buffer_size=_config.max_queue_size,
298
+ )
299
+
300
+ # Register shutdown handlers
301
+ atexit.register(_atexit_handler)
302
+ signal.signal(signal.SIGTERM, _signal_handler)
303
+ signal.signal(signal.SIGINT, _signal_handler)
304
+
305
+ _initialized = True
306
+
307
+ if _config.debug:
308
+ logger.info("[Agentreplay] SDK initialized successfully")
309
+
310
+ return _config
311
+
312
+
313
+ def _atexit_handler():
314
+ """Handle process exit."""
315
+ try:
316
+ shutdown(timeout=2.0)
317
+ except Exception:
318
+ pass
319
+
320
+
321
+ def _signal_handler(signum, frame):
322
+ """Handle SIGTERM/SIGINT."""
323
+ try:
324
+ shutdown(timeout=2.0)
325
+ except Exception:
326
+ pass
327
+
328
+
329
+ # =============================================================================
330
+ # Client Access
331
+ # =============================================================================
332
+
333
+ def get_client():
334
+ """Get the singleton AgentreplayClient.
335
+
336
+ Returns:
337
+ AgentreplayClient: The client instance
338
+
339
+ Raises:
340
+ RuntimeError: If SDK not initialized
341
+
342
+ Example:
343
+ >>> from agentreplay import init, get_client
344
+ >>> init()
345
+ >>> client = get_client()
346
+ >>> with client.trace() as span:
347
+ ... span.set_token_count(100)
348
+ """
349
+ global _client
350
+ if not _initialized:
351
+ raise RuntimeError("Agentreplay SDK not initialized. Call init() first.")
352
+ return _client
353
+
354
+
355
+ def get_batching_client():
356
+ """Get the singleton BatchingAgentreplayClient.
357
+
358
+ Returns:
359
+ BatchingAgentreplayClient: The batching client instance
360
+
361
+ Raises:
362
+ RuntimeError: If SDK not initialized
363
+ """
364
+ global _batching_client
365
+ if not _initialized:
366
+ raise RuntimeError("Agentreplay SDK not initialized. Call init() first.")
367
+ return _batching_client
368
+
369
+
370
+ def get_config() -> SDKConfig:
371
+ """Get current SDK configuration.
372
+
373
+ Returns:
374
+ SDKConfig: Current configuration
375
+
376
+ Raises:
377
+ RuntimeError: If SDK not initialized
378
+ """
379
+ global _config
380
+ if not _initialized:
381
+ raise RuntimeError("Agentreplay SDK not initialized. Call init() first.")
382
+ return _config
383
+
384
+
385
+ def is_initialized() -> bool:
386
+ """Check if SDK is initialized.
387
+
388
+ Returns:
389
+ bool: True if initialized
390
+ """
391
+ return _initialized
392
+
393
+
394
+ # =============================================================================
395
+ # Flush & Shutdown
396
+ # =============================================================================
397
+
398
+ def flush(timeout: Optional[float] = None) -> int:
399
+ """Flush all pending spans.
400
+
401
+ Call this before serverless function exits or at the end of scripts
402
+ to ensure all spans are sent.
403
+
404
+ Args:
405
+ timeout: Maximum seconds to wait (default: from config)
406
+
407
+ Returns:
408
+ int: Number of spans flushed
409
+
410
+ Example:
411
+ >>> from agentreplay import init, flush
412
+ >>> init()
413
+ >>>
414
+ >>> # ... your code ...
415
+ >>>
416
+ >>> # Flush before exit
417
+ >>> flush(timeout=5.0)
418
+ """
419
+ global _batching_client, _config
420
+
421
+ if not _initialized or _batching_client is None:
422
+ return 0
423
+
424
+ timeout = timeout or (_config.flush_timeout if _config else 5.0)
425
+
426
+ if _config and _config.debug:
427
+ logger.info(f"[Agentreplay] Flushing spans (timeout={timeout}s)")
428
+
429
+ count = _batching_client.flush()
430
+
431
+ if _config and _config.debug:
432
+ logger.info(f"[Agentreplay] Flushed {count} spans")
433
+
434
+ return count
435
+
436
+
437
+ def shutdown(timeout: Optional[float] = None) -> None:
438
+ """Shutdown the SDK gracefully.
439
+
440
+ Flushes all pending spans and stops background threads.
441
+
442
+ Args:
443
+ timeout: Maximum seconds to wait for flush
444
+
445
+ Example:
446
+ >>> from agentreplay import init, shutdown
447
+ >>> init()
448
+ >>>
449
+ >>> # ... your code ...
450
+ >>>
451
+ >>> # Shutdown before exit
452
+ >>> shutdown()
453
+ """
454
+ global _client, _batching_client, _initialized, _config
455
+
456
+ if not _initialized:
457
+ return
458
+
459
+ with _lock:
460
+ if _config and _config.debug:
461
+ logger.info("[Agentreplay] Shutting down SDK")
462
+
463
+ # Close batching client (flushes pending)
464
+ if _batching_client is not None:
465
+ try:
466
+ _batching_client.close()
467
+ except Exception as e:
468
+ if _config and _config.debug:
469
+ logger.error(f"[Agentreplay] Error closing batching client: {e}")
470
+ _batching_client = None
471
+
472
+ # Close HTTP client
473
+ if _client is not None:
474
+ try:
475
+ _client.close()
476
+ except Exception as e:
477
+ if _config and _config.debug:
478
+ logger.error(f"[Agentreplay] Error closing client: {e}")
479
+ _client = None
480
+
481
+ _initialized = False
482
+
483
+ if _config and _config.debug:
484
+ logger.info("[Agentreplay] SDK shutdown complete")
485
+
486
+
487
+ def reset() -> None:
488
+ """Reset SDK state (for testing).
489
+
490
+ Shuts down and clears all global state.
491
+ """
492
+ global _config, _client, _batching_client, _initialized
493
+
494
+ shutdown()
495
+
496
+ with _lock:
497
+ _config = None
498
+ _client = None
499
+ _batching_client = None
500
+ _initialized = False
501
+
502
+
503
+ # =============================================================================
504
+ # Diagnostics
505
+ # =============================================================================
506
+
507
+ def get_stats() -> Dict[str, Any]:
508
+ """Get SDK statistics for debugging.
509
+
510
+ Returns:
511
+ Dict with queue_size, dropped_count, last_error, etc.
512
+
513
+ Example:
514
+ >>> from agentreplay import init, get_stats
515
+ >>> init(debug=True)
516
+ >>> print(get_stats())
517
+ {'queue_size': 0, 'dropped_count': 0, 'initialized': True}
518
+ """
519
+ global _batching_client, _config
520
+
521
+ stats = {
522
+ "initialized": _initialized,
523
+ "enabled": _config.enabled if _config else False,
524
+ "debug": _config.debug if _config else False,
525
+ }
526
+
527
+ if _batching_client is not None:
528
+ stats["queue_size"] = len(_batching_client._buffer)
529
+ stats["dropped_count"] = _batching_client._dropped_count
530
+ stats["retry_queue_size"] = len(_batching_client._retry_queue)
531
+
532
+ return stats
533
+
534
+
535
+ def ping() -> Dict[str, Any]:
536
+ """Ping the server to verify connectivity.
537
+
538
+ Returns:
539
+ Dict with success, latency_ms, version, error
540
+
541
+ Example:
542
+ >>> from agentreplay import init, ping
543
+ >>> init()
544
+ >>> result = ping()
545
+ >>> if result["success"]:
546
+ ... print(f"Connected! Latency: {result['latency_ms']}ms")
547
+ """
548
+ import time
549
+
550
+ if not _initialized or _client is None:
551
+ return {"success": False, "error": "SDK not initialized"}
552
+
553
+ start = time.time()
554
+ try:
555
+ # Try health endpoint
556
+ response = _client._client.get(f"{_client.url}/health")
557
+ latency_ms = (time.time() - start) * 1000
558
+
559
+ if response.status_code == 200:
560
+ return {
561
+ "success": True,
562
+ "latency_ms": round(latency_ms, 2),
563
+ "status_code": response.status_code,
564
+ }
565
+ else:
566
+ return {
567
+ "success": False,
568
+ "latency_ms": round(latency_ms, 2),
569
+ "status_code": response.status_code,
570
+ "error": response.text[:200],
571
+ }
572
+ except Exception as e:
573
+ latency_ms = (time.time() - start) * 1000
574
+ return {
575
+ "success": False,
576
+ "latency_ms": round(latency_ms, 2),
577
+ "error": str(e),
578
+ }