agentreplay 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
agentreplay/client.py ADDED
@@ -0,0 +1,1560 @@
1
+ # Copyright 2025 Sushanth (https://github.com/sushanthpy)
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Agentreplay client for interacting with the trace engine."""
16
+
17
+ from typing import Optional, List, AsyncIterator, Callable, Dict, Any
18
+
19
+ import httpx
20
+ from agentreplay.models import AgentFlowEdge, QueryFilter, QueryResponse, SpanType
21
+ from agentreplay.span import Span
22
+ from agentreplay.genai import GenAIAttributes, calculate_cost
23
+
24
+
25
+ class AgentreplayClient:
26
+ """Client for Agentreplay agent trace engine.
27
+
28
+ Provides both low-level API for direct edge manipulation and
29
+ high-level context managers for convenient span tracking.
30
+
31
+ Args:
32
+ url: Base URL of Agentreplay server
33
+ tenant_id: Tenant identifier
34
+ project_id: Project identifier (default: 0)
35
+ agent_id: Default agent identifier (default: 1)
36
+ timeout: Request timeout in seconds (default: 30)
37
+
38
+ Example:
39
+ >>> client = AgentreplayClient(
40
+ ... url="http://localhost:8080",
41
+ ... tenant_id=1,
42
+ ... project_id=0
43
+ ... )
44
+ >>>
45
+ >>> # High-level API with context managers
46
+ >>> with client.trace(span_type=SpanType.ROOT) as root:
47
+ ... with root.child(SpanType.PLANNING) as planning:
48
+ ... planning.set_token_count(50)
49
+ ...
50
+ >>> # Low-level API
51
+ >>> edge = AgentFlowEdge(
52
+ ... tenant_id=1,
53
+ ... agent_id=1,
54
+ ... session_id=42,
55
+ ... span_type=SpanType.ROOT
56
+ ... )
57
+ >>> client.insert(edge)
58
+ """
59
+
60
+ def __init__(
61
+ self,
62
+ url: str,
63
+ tenant_id: int,
64
+ project_id: int = 0,
65
+ agent_id: int = 1,
66
+ timeout: float = 30.0,
67
+ ):
68
+ """Initialize Agentreplay client."""
69
+ self.url = url.rstrip("/")
70
+ self.tenant_id = tenant_id
71
+ self.project_id = project_id
72
+ self.agent_id = agent_id
73
+ self.timeout = timeout
74
+ # CRITICAL FIX: Configure aggressive connection pooling
75
+ # Without this, every request creates a new TCP connection (SYN/ACK overhead)
76
+ # With pooling: 10-100x better throughput for high-volume workloads
77
+ self._client = httpx.Client(
78
+ timeout=timeout,
79
+ limits=httpx.Limits(
80
+ max_connections=100, # Total concurrent connections
81
+ max_keepalive_connections=50, # Pooled idle connections
82
+ keepalive_expiry=30.0, # Keep connections alive for 30s
83
+ ),
84
+ http2=False, # HTTP/2 not needed for this use case, stick with HTTP/1.1
85
+ )
86
+ self._session_counter = 0
87
+
88
+ def __enter__(self) -> "AgentreplayClient":
89
+ """Context manager entry."""
90
+ return self
91
+
92
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
93
+ """Context manager exit."""
94
+ self.close()
95
+
96
+ def close(self) -> None:
97
+ """Close the HTTP client."""
98
+ self._client.close()
99
+
100
+ def _next_session_id(self) -> int:
101
+ """Generate next session ID."""
102
+ self._session_counter += 1
103
+ return self._session_counter
104
+
105
+ # High-level API
106
+
107
+ def trace(
108
+ self,
109
+ span_type: SpanType = SpanType.ROOT,
110
+ agent_id: Optional[int] = None,
111
+ session_id: Optional[int] = None,
112
+ ) -> Span:
113
+ """Create a new trace span.
114
+
115
+ Args:
116
+ span_type: Type of span (default: ROOT)
117
+ agent_id: Agent identifier (uses default if not provided)
118
+ session_id: Session identifier (auto-generated if not provided)
119
+
120
+ Returns:
121
+ Span context manager
122
+
123
+ Example:
124
+ >>> with client.trace() as root:
125
+ ... root.set_token_count(100)
126
+ ... with root.child(SpanType.PLANNING) as planning:
127
+ ... planning.set_confidence(0.95)
128
+ """
129
+ return Span(
130
+ client=self,
131
+ span_type=span_type,
132
+ tenant_id=self.tenant_id,
133
+ project_id=self.project_id,
134
+ agent_id=agent_id or self.agent_id,
135
+ session_id=session_id or self._next_session_id(),
136
+ parent_id=0,
137
+ )
138
+
139
+ def track_llm_call(
140
+ self,
141
+ genai_attrs: GenAIAttributes,
142
+ span_type: SpanType = SpanType.TOOL_CALL,
143
+ agent_id: Optional[int] = None,
144
+ session_id: Optional[int] = None,
145
+ parent_id: int = 0,
146
+ ) -> AgentFlowEdge:
147
+ """Track an LLM API call with OpenTelemetry GenAI attributes.
148
+
149
+ This method properly tracks LLM calls with full OpenTelemetry GenAI
150
+ semantic conventions support, enabling accurate cost calculation
151
+ and comprehensive observability.
152
+
153
+ Args:
154
+ genai_attrs: OpenTelemetry GenAI attributes from response
155
+ span_type: Type of span (default: TOOL_CALL for LLM calls)
156
+ agent_id: Agent identifier (uses default if not provided)
157
+ session_id: Session identifier (auto-generated if not provided)
158
+ parent_id: Parent edge ID (default: 0 for root)
159
+
160
+ Returns:
161
+ Inserted edge with server-assigned edge_id
162
+
163
+ Example:
164
+ >>> from agentreplay.genai import GenAIAttributes
165
+ >>>
166
+ >>> # For OpenAI
167
+ >>> response = openai.chat.completions.create(
168
+ ... model="gpt-4o",
169
+ ... messages=[{"role": "user", "content": "Hello"}],
170
+ ... temperature=0.7
171
+ ... )
172
+ >>> genai_attrs = GenAIAttributes.from_openai_response(
173
+ ... response,
174
+ ... request_params={"model": "gpt-4o", "temperature": 0.7, "messages": [...]}
175
+ ... )
176
+ >>> client.track_llm_call(genai_attrs)
177
+
178
+ >>> # For Anthropic
179
+ >>> response = anthropic.messages.create(
180
+ ... model="claude-3-5-sonnet-20241022",
181
+ ... messages=[{"role": "user", "content": "Hello"}]
182
+ ... )
183
+ >>> genai_attrs = GenAIAttributes.from_anthropic_response(response, request_params={...})
184
+ >>> client.track_llm_call(genai_attrs)
185
+ """
186
+ # Convert GenAI attributes to Agentreplay span format
187
+ import time
188
+
189
+ # CRITICAL: Ensure timestamp is in microseconds
190
+ start_time_us = int(time.time() * 1_000_000)
191
+
192
+ # Validate timestamp (must be >= 2020-01-01 in microseconds)
193
+ MIN_VALID_TIMESTAMP = 1_577_836_800_000_000 # 2020-01-01 00:00:00 UTC
194
+ if start_time_us < MIN_VALID_TIMESTAMP:
195
+ import warnings
196
+ warnings.warn(
197
+ f"WARNING: Timestamp {start_time_us} is too old/small! "
198
+ f"Check microsecond conversion. Expected >= {MIN_VALID_TIMESTAMP}."
199
+ )
200
+
201
+ span = {
202
+ "span_id": "0", # Server will assign
203
+ "trace_id": str(session_id or self._next_session_id()),
204
+ "parent_span_id": str(parent_id) if parent_id > 0 else None,
205
+ "name": f"{genai_attrs.system}:{genai_attrs.operation_name}" if genai_attrs.system and genai_attrs.operation_name else "llm_call",
206
+ "start_time": start_time_us,
207
+ "end_time": None,
208
+ "attributes": {
209
+ # Agentreplay metadata
210
+ "tenant_id": str(self.tenant_id),
211
+ "project_id": str(self.project_id),
212
+ "agent_id": str(agent_id or self.agent_id),
213
+ "span_type": str(span_type),
214
+
215
+ # Backward compatibility: total tokens
216
+ "token_count": str(genai_attrs.total_tokens or 0),
217
+
218
+ # OpenTelemetry GenAI standard attributes
219
+ **genai_attrs.to_attributes_dict(),
220
+ }
221
+ }
222
+
223
+ response = self._client.post(
224
+ f"{self.url}/api/v1/traces",
225
+ json={"spans": [span]},
226
+ )
227
+ response.raise_for_status()
228
+
229
+ # Create edge object to return
230
+ edge = AgentFlowEdge(
231
+ edge_id=0, # Server assigns
232
+ tenant_id=self.tenant_id,
233
+ project_id=self.project_id,
234
+ agent_id=agent_id or self.agent_id,
235
+ session_id=session_id or self._session_counter,
236
+ causal_parent=parent_id,
237
+ span_type=span_type,
238
+ timestamp_us=span["start_time"],
239
+ token_count=genai_attrs.total_tokens or 0,
240
+ )
241
+
242
+ return edge
243
+
244
+ def track_openai_call(
245
+ self,
246
+ response: Any,
247
+ request_params: Optional[Dict[str, Any]] = None,
248
+ span_type: SpanType = SpanType.TOOL_CALL,
249
+ agent_id: Optional[int] = None,
250
+ session_id: Optional[int] = None,
251
+ parent_id: int = 0,
252
+ ) -> AgentFlowEdge:
253
+ """Track an OpenAI API call.
254
+
255
+ Convenience method that extracts GenAI attributes and tracks the call.
256
+
257
+ Args:
258
+ response: OpenAI API response object
259
+ request_params: Request parameters (model, temperature, messages, etc.)
260
+ span_type: Type of span (default: TOOL_CALL for LLM)
261
+ agent_id: Agent identifier
262
+ session_id: Session identifier
263
+ parent_id: Parent edge ID
264
+
265
+ Returns:
266
+ Inserted edge
267
+
268
+ Example:
269
+ >>> response = openai.chat.completions.create(
270
+ ... model="gpt-4o",
271
+ ... messages=[{"role": "user", "content": "Hello"}],
272
+ ... temperature=0.7
273
+ ... )
274
+ >>> client.track_openai_call(
275
+ ... response,
276
+ ... request_params={"model": "gpt-4o", "temperature": 0.7, "messages": [...]}
277
+ ... )
278
+ """
279
+ genai_attrs = GenAIAttributes.from_openai_response(response, request_params)
280
+ return self.track_llm_call(genai_attrs, span_type, agent_id, session_id, parent_id)
281
+
282
+ def track_anthropic_call(
283
+ self,
284
+ response: Any,
285
+ request_params: Optional[Dict[str, Any]] = None,
286
+ span_type: SpanType = SpanType.TOOL_CALL,
287
+ agent_id: Optional[int] = None,
288
+ session_id: Optional[int] = None,
289
+ parent_id: int = 0,
290
+ ) -> AgentFlowEdge:
291
+ """Track an Anthropic API call.
292
+
293
+ Convenience method that extracts GenAI attributes and tracks the call.
294
+
295
+ Args:
296
+ response: Anthropic API response object
297
+ request_params: Request parameters (model, messages, etc.)
298
+ span_type: Type of span (default: TOOL_CALL for LLM)
299
+ agent_id: Agent identifier
300
+ session_id: Session identifier
301
+ parent_id: Parent edge ID
302
+
303
+ Returns:
304
+ Inserted edge
305
+
306
+ Example:
307
+ >>> response = anthropic.messages.create(
308
+ ... model="claude-3-5-sonnet-20241022",
309
+ ... messages=[{"role": "user", "content": "Hello"}]
310
+ ... )
311
+ >>> client.track_anthropic_call(
312
+ ... response,
313
+ ... request_params={"model": "claude-3-5-sonnet-20241022", "messages": [...]}
314
+ ... )
315
+ """
316
+ genai_attrs = GenAIAttributes.from_anthropic_response(response, request_params)
317
+ return self.track_llm_call(genai_attrs, span_type, agent_id, session_id, parent_id)
318
+
319
+ # Low-level API
320
+
321
+ def insert(self, edge: AgentFlowEdge) -> AgentFlowEdge:
322
+ """Insert a single edge.
323
+
324
+ Args:
325
+ edge: Edge to insert
326
+
327
+ Returns:
328
+ Inserted edge with assigned edge_id
329
+
330
+ Raises:
331
+ httpx.HTTPError: If request fails
332
+ """
333
+ # Generate edge_id if not set (timestamp + session_id + counter)
334
+ # This ensures unique IDs and proper parent-child relationships
335
+ if not edge.edge_id or edge.edge_id == 0:
336
+ import random
337
+ edge.edge_id = (edge.timestamp_us << 32) | (edge.session_id & 0xFFFFFFFF) | random.randint(1, 999)
338
+
339
+ # Calculate end_time if duration is set
340
+ end_time = edge.timestamp_us + edge.duration_us if edge.duration_us > 0 else None
341
+
342
+ # Convert edge to AgentreplaySpan format expected by server
343
+ span = {
344
+ "span_id": str(edge.edge_id),
345
+ "trace_id": str(edge.session_id),
346
+ "parent_span_id": str(edge.causal_parent) if edge.causal_parent else None,
347
+ "name": edge.span_type.name if hasattr(edge.span_type, 'name') else f"span_{edge.span_type}",
348
+ "start_time": edge.timestamp_us,
349
+ "end_time": end_time,
350
+ "attributes": {
351
+ "tenant_id": str(edge.tenant_id),
352
+ "project_id": str(edge.project_id),
353
+ "agent_id": str(edge.agent_id),
354
+ "session_id": str(edge.session_id),
355
+ "span_type": str(edge.span_type),
356
+ "token_count": str(edge.token_count),
357
+ "duration_us": str(edge.duration_us),
358
+ }
359
+ }
360
+
361
+ response = self._client.post(
362
+ f"{self.url}/api/v1/traces",
363
+ json={"spans": [span]},
364
+ )
365
+ response.raise_for_status()
366
+
367
+ return edge
368
+
369
+ def insert_batch(self, edges: List[AgentFlowEdge]) -> List[AgentFlowEdge]:
370
+ """Insert multiple edges in a batch.
371
+
372
+ This is 10-100x faster than individual inserts for large batches.
373
+
374
+ Args:
375
+ edges: List of edges to insert
376
+
377
+ Returns:
378
+ List of inserted edges with server-assigned edge_ids
379
+
380
+ Raises:
381
+ httpx.HTTPError: If request fails
382
+ """
383
+ response = self._client.post(
384
+ f"{self.url}/api/v1/traces",
385
+ json=[e.model_dump() for e in edges],
386
+ )
387
+ response.raise_for_status()
388
+ return [AgentFlowEdge(**e) for e in response.json()]
389
+
390
+ def submit_feedback(self, trace_id: str, feedback: int) -> dict:
391
+ """Submit user feedback for a trace.
392
+
393
+ Captures user satisfaction signals (thumbs up/down) for building
394
+ evaluation datasets from production failures.
395
+
396
+ Args:
397
+ trace_id: Trace/edge identifier (hex string)
398
+ feedback: -1 (thumbs down), 0 (neutral), 1 (thumbs up)
399
+
400
+ Returns:
401
+ Response dict with status
402
+
403
+ Raises:
404
+ httpx.HTTPError: If request fails
405
+ ValueError: If feedback not in {-1, 0, 1}
406
+
407
+ Example:
408
+ >>> client.submit_feedback("1730000000000000", feedback=1)
409
+ {'success': True, 'message': 'Feedback recorded'}
410
+ """
411
+ if feedback not in {-1, 0, 1}:
412
+ raise ValueError(f"Feedback must be -1, 0, or 1, got {feedback}")
413
+
414
+ response = self._client.post(
415
+ f"{self.url}/api/v1/traces/{trace_id}/feedback",
416
+ json={"feedback": feedback},
417
+ )
418
+ response.raise_for_status()
419
+ return response.json()
420
+
421
+ def add_to_dataset(
422
+ self, trace_id: str, dataset_name: str, input_data: Optional[dict] = None, output_data: Optional[dict] = None
423
+ ) -> dict:
424
+ """Add a trace to an evaluation dataset.
425
+
426
+ Enables production→evaluation feedback loop where bad responses
427
+ get converted into test cases.
428
+
429
+ Args:
430
+ trace_id: Trace/edge identifier (hex string)
431
+ dataset_name: Name of dataset to add to
432
+ input_data: Optional input data to store with trace
433
+ output_data: Optional output data to store with trace
434
+
435
+ Returns:
436
+ Response dict with status
437
+
438
+ Raises:
439
+ httpx.HTTPError: If request fails
440
+
441
+ Example:
442
+ >>> client.add_to_dataset(
443
+ ... "1730000000000000",
444
+ ... "bad_responses",
445
+ ... input_data={"prompt": "Hello"},
446
+ ... output_data={"response": "..."}
447
+ ... )
448
+ {'success': True, 'dataset_name': 'bad_responses'}
449
+ """
450
+ payload = {"trace_id": trace_id}
451
+ if input_data:
452
+ payload["input"] = input_data
453
+ if output_data:
454
+ payload["output"] = output_data
455
+
456
+ response = self._client.post(
457
+ f"{self.url}/api/v1/datasets/{dataset_name}/add",
458
+ json=payload,
459
+ )
460
+ response.raise_for_status()
461
+ return response.json()
462
+
463
+ def get(self, edge_id: int) -> Optional[AgentFlowEdge]:
464
+ """Get an edge by ID.
465
+
466
+ Args:
467
+ edge_id: Edge identifier
468
+
469
+ Returns:
470
+ Edge if found, None otherwise
471
+
472
+ Raises:
473
+ httpx.HTTPError: If request fails
474
+ """
475
+ response = self._client.get(f"{self.url}/api/v1/edges/{edge_id}")
476
+ if response.status_code == 404:
477
+ return None
478
+ response.raise_for_status()
479
+ return AgentFlowEdge(**response.json())
480
+
481
+ def query_temporal_range(
482
+ self,
483
+ start_timestamp_us: int,
484
+ end_timestamp_us: int,
485
+ filter: Optional[QueryFilter] = None,
486
+ ) -> QueryResponse:
487
+ """Query edges in a temporal range.
488
+
489
+ Args:
490
+ start_timestamp_us: Start timestamp (microseconds since epoch)
491
+ end_timestamp_us: End timestamp (microseconds since epoch)
492
+ filter: Optional query filters
493
+
494
+ Returns:
495
+ Query response with matching edges
496
+
497
+ Raises:
498
+ httpx.HTTPError: If request fails
499
+ """
500
+ params = {
501
+ "start_ts": start_timestamp_us,
502
+ "end_ts": end_timestamp_us,
503
+ }
504
+ if filter:
505
+ filter_dict = filter.model_dump(exclude_none=True)
506
+ if "session_id" in filter_dict:
507
+ params["session_id"] = filter_dict["session_id"]
508
+ if "agent_id" in filter_dict:
509
+ params["agent_id"] = filter_dict["agent_id"]
510
+ if "environment" in filter_dict:
511
+ params["environment"] = filter_dict["environment"]
512
+ if "exclude_pii" in filter_dict:
513
+ params["exclude_pii"] = filter_dict["exclude_pii"]
514
+
515
+ response = self._client.get(
516
+ f"{self.url}/api/v1/traces",
517
+ params=params,
518
+ )
519
+ response.raise_for_status()
520
+ result = response.json()
521
+
522
+ # Handle direct list response
523
+ if isinstance(result, list):
524
+ return QueryResponse(
525
+ edges=[AgentFlowEdge(**e) for e in result],
526
+ total_count=len(result)
527
+ )
528
+
529
+ # Handle server's TracesResponse format {traces, total, limit, offset}
530
+ # Map TraceView format to AgentFlowEdge format
531
+ if isinstance(result, dict) and 'traces' in result:
532
+ edges = []
533
+ for trace in result['traces']:
534
+ # Map TraceView fields to AgentFlowEdge fields
535
+ edge_data = {
536
+ 'edge_id': int(trace.get('span_id', '0x0'), 16) if isinstance(trace.get('span_id'), str) else trace.get('span_id', 0),
537
+ 'causal_parent': int(trace.get('parent_span_id', '0x0'), 16) if trace.get('parent_span_id') and isinstance(trace.get('parent_span_id'), str) else 0,
538
+ 'timestamp_us': trace.get('timestamp_us', 0),
539
+ 'tenant_id': trace.get('tenant_id', self.tenant_id),
540
+ 'project_id': trace.get('project_id', self.project_id),
541
+ 'agent_id': trace.get('agent_id', self.agent_id),
542
+ 'session_id': trace.get('session_id', 0),
543
+ 'span_type': trace.get('span_type', 0),
544
+ 'duration_us': trace.get('duration_us', 0),
545
+ 'token_count': trace.get('token_count', 0),
546
+ 'sensitivity_flags': trace.get('sensitivity_flags', 0),
547
+ }
548
+ edges.append(AgentFlowEdge(**edge_data))
549
+
550
+ return QueryResponse(
551
+ edges=edges,
552
+ total_count=result.get('total', 0)
553
+ )
554
+
555
+ # Handle structured response (legacy format with edges field)
556
+ return QueryResponse(**result)
557
+
558
+ # Causal queries
559
+
560
+ def get_children(self, edge_id: int) -> List[AgentFlowEdge]:
561
+ """Get direct children of an edge.
562
+
563
+ Args:
564
+ edge_id: Parent edge identifier
565
+
566
+ Returns:
567
+ List of child edges
568
+
569
+ Raises:
570
+ httpx.HTTPError: If request fails
571
+ """
572
+ response = self._client.get(f"{self.url}/api/v1/edges/{edge_id}/children")
573
+ response.raise_for_status()
574
+ return [AgentFlowEdge(**e) for e in response.json()]
575
+
576
+ def get_ancestors(self, edge_id: int) -> List[AgentFlowEdge]:
577
+ """Get all ancestors of an edge (path to root).
578
+
579
+ Args:
580
+ edge_id: Edge identifier
581
+
582
+ Returns:
583
+ List of ancestor edges (root to immediate parent)
584
+
585
+ Raises:
586
+ httpx.HTTPError: If request fails
587
+ """
588
+ response = self._client.get(f"{self.url}/api/v1/edges/{edge_id}/ancestors")
589
+ response.raise_for_status()
590
+ return [AgentFlowEdge(**e) for e in response.json()]
591
+
592
+ def get_descendants(self, edge_id: int) -> List[AgentFlowEdge]:
593
+ """Get all descendants of an edge (entire subtree).
594
+
595
+ Args:
596
+ edge_id: Edge identifier
597
+
598
+ Returns:
599
+ List of descendant edges
600
+
601
+ Raises:
602
+ httpx.HTTPError: If request fails
603
+ """
604
+ response = self._client.get(f"{self.url}/api/v1/edges/{edge_id}/descendants")
605
+ response.raise_for_status()
606
+ return [AgentFlowEdge(**e) for e in response.json()]
607
+
608
+ def get_path(self, from_edge_id: int, to_edge_id: int) -> List[AgentFlowEdge]:
609
+ """Get path between two edges in the causal graph.
610
+
611
+ Args:
612
+ from_edge_id: Start edge identifier
613
+ to_edge_id: End edge identifier
614
+
615
+ Returns:
616
+ List of edges forming the path (empty if no path exists)
617
+
618
+ Raises:
619
+ httpx.HTTPError: If request fails
620
+ """
621
+ response = self._client.get(
622
+ f"{self.url}/api/v1/edges/{from_edge_id}/path/{to_edge_id}"
623
+ )
624
+ response.raise_for_status()
625
+ return [AgentFlowEdge(**e) for e in response.json()]
626
+
627
+ # Session queries
628
+
629
+ def filter_by_session(
630
+ self, session_id: int, start_timestamp_us: int = 0, end_timestamp_us: int = 0
631
+ ) -> List[AgentFlowEdge]:
632
+ """Get all edges in a session.
633
+
634
+ Args:
635
+ session_id: Session identifier
636
+ start_timestamp_us: Optional start timestamp filter
637
+ end_timestamp_us: Optional end timestamp filter (0 = now)
638
+
639
+ Returns:
640
+ List of edges in the session
641
+
642
+ Raises:
643
+ httpx.HTTPError: If request fails
644
+ """
645
+ if end_timestamp_us == 0:
646
+ import time
647
+
648
+ end_timestamp_us = int(time.time() * 1_000_000)
649
+
650
+ filter = QueryFilter(session_id=session_id, tenant_id=self.tenant_id, project_id=self.project_id)
651
+ response = self.query_temporal_range(start_timestamp_us, end_timestamp_us, filter)
652
+ return response.edges
653
+
654
+ # Backward compatibility methods for old examples
655
+
656
+ def create_trace(
657
+ self,
658
+ agent_id: int,
659
+ session_id: int,
660
+ span_type: SpanType,
661
+ parent_id: int = None,
662
+ metadata: dict = None
663
+ ) -> dict:
664
+ """Create a new trace (backward compatibility method).
665
+
666
+ Args:
667
+ agent_id: Agent identifier
668
+ session_id: Session identifier
669
+ span_type: Type of span
670
+ parent_id: Optional parent edge ID (can be int or hex string)
671
+ metadata: Optional metadata dict (will be stored as attributes)
672
+
673
+ Returns:
674
+ Dict with trace information including edge_id (as hex string)
675
+ """
676
+ import time
677
+
678
+ # Generate edge ID and timestamps
679
+ edge_id = self._generate_edge_id() # Now returns hex string
680
+ start_time_us = int(time.time() * 1_000_000)
681
+
682
+ # Normalize parent_id to hex string if provided
683
+ parent_span_id = None
684
+ if parent_id is not None:
685
+ if isinstance(parent_id, int):
686
+ parent_span_id = hex(parent_id)[2:]
687
+ else:
688
+ parent_span_id = str(parent_id)
689
+
690
+ # Create span with attributes
691
+ span = {
692
+ "span_id": edge_id,
693
+ "trace_id": str(session_id),
694
+ "parent_span_id": parent_span_id,
695
+ "name": metadata.get("name", f"span_{agent_id}") if metadata else f"span_{agent_id}",
696
+ "start_time": start_time_us,
697
+ "end_time": start_time_us, # Will be updated later
698
+ "attributes": {
699
+ "tenant_id": str(self.tenant_id),
700
+ "project_id": str(self.project_id),
701
+ "agent_id": str(agent_id),
702
+ "session_id": str(session_id),
703
+ "span_type": str(span_type.value if hasattr(span_type, 'value') else span_type),
704
+ "token_count": "0",
705
+ "duration_us": "0",
706
+ }
707
+ }
708
+
709
+ # Add metadata as attributes
710
+ if metadata:
711
+ for key, value in metadata.items():
712
+ # Skip 'name' as it's already used
713
+ if key == "name":
714
+ continue
715
+ # Convert nested dicts/lists to JSON strings
716
+ if isinstance(value, (dict, list)):
717
+ import json
718
+ value_str = json.dumps(value)
719
+ else:
720
+ value_str = str(value)
721
+ span["attributes"][key] = value_str
722
+
723
+ try:
724
+ response = self._client.post(
725
+ f"{self.url}/api/v1/traces",
726
+ json={"spans": [span]},
727
+ )
728
+ response.raise_for_status()
729
+ except Exception as e:
730
+ import warnings
731
+ warnings.warn(f"Failed to create trace: {e}")
732
+
733
+ return {
734
+ "edge_id": edge_id,
735
+ "tenant_id": self.tenant_id,
736
+ "agent_id": agent_id,
737
+ "session_id": session_id,
738
+ "span_type": span_type.name if hasattr(span_type, 'name') else str(span_type),
739
+ "metadata": metadata or {},
740
+ }
741
+
742
+ def _generate_edge_id(self) -> str:
743
+ """Generate a unique edge ID as hex string."""
744
+ import time
745
+ import random
746
+ # Use timestamp + random bits for uniqueness
747
+ timestamp = int(time.time() * 1000) & 0xFFFFFFFFFFFF # 48 bits
748
+ random_bits = random.randint(0, 0xFFFF) # 16 bits
749
+ edge_id = (timestamp << 16) | random_bits
750
+ return hex(edge_id)[2:] # Remove '0x' prefix
751
+
752
+ def create_genai_trace(
753
+ self,
754
+ agent_id: int,
755
+ session_id: int,
756
+ input_messages: list = None,
757
+ output: dict = None,
758
+ model: str = None,
759
+ model_parameters: dict = None,
760
+ input_usage: int = None,
761
+ output_usage: int = None,
762
+ total_usage: int = None,
763
+ parent_id: str = None,
764
+ metadata: dict = None,
765
+ operation_name: str = "chat",
766
+ finish_reason: str = None,
767
+ system: str = None,
768
+ user_id: str = None,
769
+ user_name: str = None,
770
+ conversation_id: str = None,
771
+ environment: str = None
772
+ ) -> dict:
773
+ """Create a GenAI trace with full OTEL semantic conventions.
774
+
775
+ Follows OpenTelemetry GenAI specification:
776
+ https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/
777
+ https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-events/
778
+
779
+ Args:
780
+ agent_id: Agent identifier
781
+ session_id: Session identifier
782
+ input_messages: List of input messages with role/content (OTEL events format)
783
+ output: Output object with role/content (OTEL event format)
784
+ model: Model name (REQUIRED by OTEL spec)
785
+ model_parameters: Model parameters dict (temperature, top_p, max_tokens, etc.)
786
+ input_usage: Prompt tokens (gen_ai.usage.prompt_tokens)
787
+ output_usage: Completion tokens (gen_ai.usage.completion_tokens)
788
+ total_usage: Total tokens (gen_ai.usage.total_tokens)
789
+ parent_id: Optional parent span ID
790
+ metadata: Additional metadata (user.*, session.*, deployment.*)
791
+ operation_name: Operation name ("chat", "completion", "embedding")
792
+ finish_reason: Completion finish reason ("stop", "length", "tool_calls")
793
+ system: Provider system ("openai", "anthropic", "meta") - auto-detected if not provided
794
+ user_id: User identifier (user.id)
795
+ user_name: User name (user.name)
796
+ conversation_id: Conversation identifier (conversation.id)
797
+ environment: Deployment environment (deployment.environment)
798
+
799
+ Returns:
800
+ Dict with trace information including edge_id
801
+ """
802
+ import time
803
+ import json
804
+
805
+ edge_id = self._generate_edge_id()
806
+ start_time_us = int(time.time() * 1_000_000)
807
+
808
+ # Normalize parent_id
809
+ parent_span_id = None
810
+ if parent_id is not None:
811
+ if isinstance(parent_id, int):
812
+ parent_span_id = hex(parent_id)[2:]
813
+ else:
814
+ parent_span_id = str(parent_id)
815
+
816
+ # Build OTEL GenAI attributes (REQUIRED by spec)
817
+ attributes = {
818
+ "tenant_id": str(self.tenant_id),
819
+ "project_id": str(self.project_id),
820
+ "agent_id": str(agent_id),
821
+ "session_id": str(session_id),
822
+ "span_type": "0", # AGENT/GENERATION
823
+ "gen_ai.operation.name": operation_name, # RECOMMENDED
824
+ }
825
+
826
+ # REQUIRED: gen_ai.system and gen_ai.request.model
827
+ if system:
828
+ attributes["gen_ai.system"] = system
829
+ elif model:
830
+ # Auto-detect system from model name
831
+ model_lower = model.lower()
832
+ if "gpt" in model_lower or "openai" in model_lower:
833
+ attributes["gen_ai.system"] = "openai"
834
+ elif "claude" in model_lower or "anthropic" in model_lower:
835
+ attributes["gen_ai.system"] = "anthropic"
836
+ elif "llama" in model_lower or "meta" in model_lower:
837
+ attributes["gen_ai.system"] = "meta"
838
+ elif "gemini" in model_lower or "palm" in model_lower:
839
+ attributes["gen_ai.system"] = "google"
840
+ else:
841
+ attributes["gen_ai.system"] = "unknown"
842
+
843
+ if model:
844
+ attributes["gen_ai.request.model"] = model # REQUIRED
845
+ attributes["gen_ai.response.model"] = model
846
+
847
+ # Add model parameters (OPTIONAL but recommended)
848
+ if model_parameters:
849
+ for key, value in model_parameters.items():
850
+ param_key = f"gen_ai.request.{key}"
851
+ attributes[param_key] = str(value)
852
+
853
+ # RECOMMENDED: Token usage (gen_ai.usage.*)
854
+ if input_usage is not None:
855
+ attributes["gen_ai.usage.prompt_tokens"] = str(input_usage) # OTEL spec name
856
+ attributes["gen_ai.usage.input_tokens"] = str(input_usage) # Alias
857
+ if output_usage is not None:
858
+ attributes["gen_ai.usage.completion_tokens"] = str(output_usage) # OTEL spec name
859
+ attributes["gen_ai.usage.output_tokens"] = str(output_usage) # Alias
860
+ if total_usage is not None:
861
+ attributes["gen_ai.usage.total_tokens"] = str(total_usage)
862
+ attributes["token_count"] = str(total_usage) # Legacy
863
+
864
+ # OPTIONAL: Finish reason
865
+ if finish_reason:
866
+ attributes["gen_ai.response.finish_reasons"] = json.dumps([finish_reason])
867
+
868
+ # User/Session context (custom metadata)
869
+ if user_id:
870
+ attributes["user.id"] = user_id
871
+ if user_name:
872
+ attributes["user.name"] = user_name
873
+ if conversation_id:
874
+ attributes["conversation.id"] = conversation_id
875
+ if environment:
876
+ attributes["deployment.environment"] = environment
877
+
878
+ # OTEL Events (preferred way for prompts/completions)
879
+ # Store as JSON arrays in attributes for now (backend will parse)
880
+ events = []
881
+
882
+ # gen_ai.content.prompt events
883
+ if input_messages:
884
+ for idx, msg in enumerate(input_messages):
885
+ event = {
886
+ "name": "gen_ai.content.prompt",
887
+ "timestamp": start_time_us,
888
+ "attributes": {
889
+ "gen_ai.prompt": msg.get("content", ""),
890
+ "gen_ai.content.role": msg.get("role", "user"),
891
+ "gen_ai.content.index": idx
892
+ }
893
+ }
894
+ events.append(event)
895
+ # Also store full messages for compatibility
896
+ attributes["gen_ai.prompt.messages"] = json.dumps(input_messages)
897
+
898
+ # gen_ai.content.completion event
899
+ if output:
900
+ event = {
901
+ "name": "gen_ai.content.completion",
902
+ "timestamp": start_time_us,
903
+ "attributes": {
904
+ "gen_ai.completion": output.get("content", "") if isinstance(output, dict) else str(output),
905
+ "gen_ai.content.role": output.get("role", "assistant") if isinstance(output, dict) else "assistant"
906
+ }
907
+ }
908
+ events.append(event)
909
+ # Store full output
910
+ attributes["gen_ai.completion.message"] = json.dumps(output)
911
+
912
+ # Store events as JSON array
913
+ if events:
914
+ attributes["otel.events"] = json.dumps(events)
915
+
916
+ # Add any additional metadata
917
+ if metadata:
918
+ for key, value in metadata.items():
919
+ if key not in attributes:
920
+ if isinstance(value, (dict, list)):
921
+ attributes[f"metadata.{key}"] = json.dumps(value)
922
+ else:
923
+ attributes[f"metadata.{key}"] = str(value)
924
+
925
+ # Create span
926
+ span = {
927
+ "span_id": edge_id,
928
+ "trace_id": str(session_id),
929
+ "parent_span_id": parent_span_id,
930
+ "name": f"{operation_name}-{model or 'unknown'}",
931
+ "start_time": start_time_us,
932
+ "end_time": start_time_us,
933
+ "attributes": attributes
934
+ }
935
+
936
+ try:
937
+ response = self._client.post(
938
+ f"{self.url}/api/v1/traces",
939
+ json={"spans": [span]},
940
+ )
941
+ response.raise_for_status()
942
+ except Exception as e:
943
+ import warnings
944
+ warnings.warn(f"Failed to create GenAI trace: {e}")
945
+
946
+ return {
947
+ "edge_id": edge_id,
948
+ "tenant_id": self.tenant_id,
949
+ "agent_id": agent_id,
950
+ "session_id": session_id,
951
+ "span_type": "GENERATION",
952
+ "model": model,
953
+ "input_usage": input_usage,
954
+ "output_usage": output_usage,
955
+ "total_usage": total_usage,
956
+ }
957
+
958
+ def create_tool_trace(
959
+ self,
960
+ agent_id: int,
961
+ session_id: int,
962
+ tool_name: str,
963
+ tool_input: dict = None,
964
+ tool_output: dict = None,
965
+ tool_description: str = None,
966
+ tool_parameters_schema: dict = None,
967
+ parent_id: str = None,
968
+ metadata: dict = None
969
+ ) -> dict:
970
+ """Create a tool call trace following OTEL agent spans spec.
971
+
972
+ Follows: https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-agent-spans/
973
+
974
+ Args:
975
+ agent_id: Agent identifier
976
+ session_id: Session identifier
977
+ tool_name: Tool name (gen_ai.tool.name)
978
+ tool_input: Tool call input/arguments
979
+ tool_output: Tool call result/output
980
+ tool_description: Tool description
981
+ tool_parameters_schema: JSON schema for tool parameters
982
+ parent_id: Parent span ID (usually the LLM span that requested the tool)
983
+ metadata: Additional metadata
984
+
985
+ Returns:
986
+ Dict with trace information
987
+ """
988
+ import time
989
+ import json
990
+
991
+ edge_id = self._generate_edge_id()
992
+ start_time_us = int(time.time() * 1_000_000)
993
+
994
+ # Normalize parent_id
995
+ parent_span_id = None
996
+ if parent_id is not None:
997
+ if isinstance(parent_id, int):
998
+ parent_span_id = hex(parent_id)[2:]
999
+ else:
1000
+ parent_span_id = str(parent_id)
1001
+
1002
+ # Build attributes for tool span
1003
+ attributes = {
1004
+ "tenant_id": str(self.tenant_id),
1005
+ "project_id": str(self.project_id),
1006
+ "agent_id": str(agent_id),
1007
+ "session_id": str(session_id),
1008
+ "span_type": "3", # TOOL
1009
+ "gen_ai.tool.name": tool_name,
1010
+ }
1011
+
1012
+ if tool_description:
1013
+ attributes["gen_ai.tool.description"] = tool_description
1014
+
1015
+ if tool_parameters_schema:
1016
+ attributes["gen_ai.tool.parameters"] = json.dumps(tool_parameters_schema)
1017
+
1018
+ if tool_input:
1019
+ attributes["gen_ai.tool.call.input"] = json.dumps(tool_input)
1020
+
1021
+ if tool_output:
1022
+ attributes["gen_ai.tool.call.output"] = json.dumps(tool_output)
1023
+
1024
+ # Add metadata
1025
+ if metadata:
1026
+ for key, value in metadata.items():
1027
+ if key not in attributes:
1028
+ if isinstance(value, (dict, list)):
1029
+ attributes[f"metadata.{key}"] = json.dumps(value)
1030
+ else:
1031
+ attributes[f"metadata.{key}"] = str(value)
1032
+
1033
+ # Create span
1034
+ span = {
1035
+ "span_id": edge_id,
1036
+ "trace_id": str(session_id),
1037
+ "parent_span_id": parent_span_id,
1038
+ "name": f"tool-{tool_name}",
1039
+ "start_time": start_time_us,
1040
+ "end_time": start_time_us,
1041
+ "attributes": attributes
1042
+ }
1043
+
1044
+ try:
1045
+ response = self._client.post(
1046
+ f"{self.url}/api/v1/traces",
1047
+ json={"spans": [span]},
1048
+ )
1049
+ response.raise_for_status()
1050
+ except Exception as e:
1051
+ import warnings
1052
+ warnings.warn(f"Failed to create tool trace: {e}")
1053
+
1054
+ return {
1055
+ "edge_id": edge_id,
1056
+ "tenant_id": self.tenant_id,
1057
+ "agent_id": agent_id,
1058
+ "session_id": session_id,
1059
+ "span_type": "TOOL",
1060
+ "tool_name": tool_name,
1061
+ }
1062
+
1063
+ def update_trace(
1064
+ self,
1065
+ edge_id, # Can be int or hex string
1066
+ token_count: int = None,
1067
+ duration_ms: int = None,
1068
+ duration_us: int = None,
1069
+ payload: dict = None,
1070
+ metadata: dict = None,
1071
+ session_id: int = None
1072
+ ) -> None:
1073
+ """Update a trace by sending a completion span.
1074
+
1075
+ Sends an updated span with end_time, duration, and token count.
1076
+ This creates a new trace event showing the completion.
1077
+
1078
+ Args:
1079
+ edge_id: Edge identifier (int or hex string)
1080
+ token_count: Token count to set
1081
+ duration_ms: Duration in milliseconds
1082
+ duration_us: Duration in microseconds
1083
+ payload: Payload data (prompt, response, etc.)
1084
+ metadata: Additional metadata
1085
+ session_id: Session ID (required for tracking)
1086
+ """
1087
+ import time
1088
+
1089
+ # If no session_id provided, we can't track this update properly
1090
+ if not session_id:
1091
+ import warnings
1092
+ warnings.warn("update_trace called without session_id - update will not be tracked")
1093
+ return
1094
+
1095
+ # Normalize edge_id to hex string
1096
+ if isinstance(edge_id, int):
1097
+ edge_id_hex = hex(edge_id)[2:]
1098
+ else:
1099
+ edge_id_hex = str(edge_id)
1100
+
1101
+ # Calculate end time and duration
1102
+ end_time_us = int(time.time() * 1_000_000)
1103
+ if duration_us:
1104
+ start_time_us = end_time_us - duration_us
1105
+ elif duration_ms:
1106
+ duration_us = duration_ms * 1000
1107
+ start_time_us = end_time_us - duration_us
1108
+ else:
1109
+ # No duration provided, use a small default
1110
+ duration_us = 1000 # 1ms
1111
+ start_time_us = end_time_us - duration_us
1112
+
1113
+ # Create a completion span
1114
+ span = {
1115
+ "span_id": f"{edge_id_hex}_complete",
1116
+ "trace_id": str(session_id),
1117
+ "parent_span_id": edge_id_hex,
1118
+ "name": "RESPONSE", # Mark as response/completion span
1119
+ "start_time": start_time_us,
1120
+ "end_time": end_time_us,
1121
+ "attributes": {
1122
+ "tenant_id": str(self.tenant_id),
1123
+ "project_id": str(self.project_id),
1124
+ "agent_id": str(self.agent_id),
1125
+ "session_id": str(session_id),
1126
+ "span_type": "6", # RESPONSE = 6
1127
+ "token_count": str(token_count) if token_count else "0",
1128
+ "duration_us": str(duration_us),
1129
+ }
1130
+ }
1131
+
1132
+ # Add payload as attributes if provided
1133
+ if payload:
1134
+ for key, value in payload.items():
1135
+ # Convert nested dicts/lists to JSON strings
1136
+ if isinstance(value, (dict, list)):
1137
+ import json
1138
+ value_str = json.dumps(value)
1139
+ else:
1140
+ value_str = str(value)
1141
+ span["attributes"][f"payload.{key}"] = value_str
1142
+
1143
+ try:
1144
+ response = self._client.post(
1145
+ f"{self.url}/api/v1/traces",
1146
+ json={"spans": [span]},
1147
+ )
1148
+ response.raise_for_status()
1149
+ except Exception as e:
1150
+ # Don't fail the test if update fails
1151
+ import warnings
1152
+ warnings.warn(f"Failed to update trace: {e}")
1153
+
1154
+ def query_traces(
1155
+ self,
1156
+ session_id: int = None,
1157
+ start_time: int = None,
1158
+ end_time: int = None,
1159
+ start_ts: int = None, # Alias for start_time
1160
+ end_ts: int = None, # Alias for end_time
1161
+ limit: int = 100
1162
+ ) -> list:
1163
+ """Query traces (backward compatibility method).
1164
+
1165
+ Args:
1166
+ session_id: Optional session filter
1167
+ start_time: Start timestamp in microseconds
1168
+ end_time: End timestamp in microseconds
1169
+ start_ts: Alias for start_time
1170
+ end_ts: Alias for end_time
1171
+ limit: Max results
1172
+
1173
+ Returns:
1174
+ List of edges
1175
+ """
1176
+ # Support both start_time/end_time and start_ts/end_ts
1177
+ start = start_time or start_ts
1178
+ end = end_time or end_ts
1179
+
1180
+ if session_id is not None:
1181
+ return self.filter_by_session(
1182
+ session_id=session_id,
1183
+ start_timestamp_us=start or 0,
1184
+ end_timestamp_us=end or 0
1185
+ )
1186
+ elif start is not None and end is not None:
1187
+ filter = QueryFilter(tenant_id=self.tenant_id, project_id=self.project_id)
1188
+ response = self.query_temporal_range(start, end, filter)
1189
+ return response.edges[:limit]
1190
+ else:
1191
+ return []
1192
+
1193
+ # Memory API
1194
+
1195
+ def ingest_memory(
1196
+ self,
1197
+ collection: str,
1198
+ content: str,
1199
+ metadata: Optional[Dict[str, str]] = None,
1200
+ ) -> dict:
1201
+ """Ingest content into memory (Online Mode).
1202
+
1203
+ Args:
1204
+ collection: Name of the collection (e.g. 'agent_history')
1205
+ content: The text content to memorize
1206
+ metadata: Optional key-value metadata
1207
+
1208
+ Returns:
1209
+ Dict with 'id', 'status', 'collection'
1210
+
1211
+ Example:
1212
+ >>> client.ingest_memory(
1213
+ ... collection="user_prefs",
1214
+ ... content="User likes dark mode",
1215
+ ... metadata={"source": "chat"}
1216
+ ... )
1217
+ """
1218
+ response = self._client.post(
1219
+ f"{self.url}/api/v1/memory/ingest",
1220
+ json={
1221
+ "collection": collection,
1222
+ "content": content,
1223
+ "metadata": metadata or {},
1224
+ },
1225
+ )
1226
+ response.raise_for_status()
1227
+ return response.json()
1228
+
1229
+ def retrieve_memory(
1230
+ self,
1231
+ collection: str,
1232
+ query: str,
1233
+ k: int = 5,
1234
+ ) -> dict:
1235
+ """Retrieve similar memories (Online Mode).
1236
+
1237
+ Args:
1238
+ collection: Name of the collection
1239
+ query: The search query
1240
+ k: Number of results to return (default: 5)
1241
+
1242
+ Returns:
1243
+ Dict with 'results' list, 'query', 'collection'
1244
+
1245
+ Example:
1246
+ >>> results = client.retrieve_memory("user_prefs", "What mode?")
1247
+ >>> for mem in results['results']:
1248
+ ... print(mem['content'])
1249
+ """
1250
+ response = self._client.post(
1251
+ f"{self.url}/api/v1/memory/retrieve",
1252
+ json={
1253
+ "collection": collection,
1254
+ "query": query,
1255
+ "k": k,
1256
+ },
1257
+ )
1258
+ response.raise_for_status()
1259
+ return response.json()
1260
+
1261
+ def list_collections(self) -> dict:
1262
+ """List all memory collections.
1263
+
1264
+ Returns:
1265
+ Dict with list of collection names
1266
+ """
1267
+ response = self._client.get(f"{self.url}/api/v1/memory/collections")
1268
+ response.raise_for_status()
1269
+ return response.json()
1270
+
1271
+ def get_memory_info(self) -> dict:
1272
+ """Get information about the memory system.
1273
+
1274
+ Returns:
1275
+ System status and configuration info
1276
+ """
1277
+ response = self._client.get(f"{self.url}/api/v1/memory/info")
1278
+ response.raise_for_status()
1279
+ return response.json()
1280
+
1281
+
1282
+
1283
+ class AsyncAgentreplayClient:
1284
+ """Async version of AgentreplayClient for high-performance applications.
1285
+
1286
+ Provides the same API as AgentreplayClient but with async/await support.
1287
+ """
1288
+
1289
+ def __init__(
1290
+ self,
1291
+ url: str,
1292
+ tenant_id: int,
1293
+ project_id: int = 0,
1294
+ agent_id: int = 1,
1295
+ timeout: float = 30.0,
1296
+ ):
1297
+ """Initialize async Agentreplay client."""
1298
+ self.url = url.rstrip("/")
1299
+ self.tenant_id = tenant_id
1300
+ self.project_id = project_id
1301
+ self.agent_id = agent_id
1302
+ self.timeout = timeout
1303
+ # CRITICAL FIX: Configure aggressive connection pooling
1304
+ # Without this, every request creates a new TCP connection (SYN/ACK overhead)
1305
+ # With pooling: 10-100x better throughput for high-volume workloads
1306
+ self._client = httpx.AsyncClient(
1307
+ timeout=timeout,
1308
+ limits=httpx.Limits(
1309
+ max_connections=100, # Total concurrent connections
1310
+ max_keepalive_connections=50, # Pooled idle connections
1311
+ keepalive_expiry=30.0, # Keep connections alive for 30s
1312
+ ),
1313
+ http2=False, # HTTP/2 not needed for this use case, stick with HTTP/1.1
1314
+ )
1315
+ self._session_counter = 0
1316
+
1317
+ async def __aenter__(self) -> "AsyncAgentreplayClient":
1318
+ """Async context manager entry."""
1319
+ return self
1320
+
1321
+ async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
1322
+ """Async context manager exit."""
1323
+ await self.close()
1324
+
1325
+ async def close(self) -> None:
1326
+ """Close the HTTP client."""
1327
+ await self._client.aclose()
1328
+
1329
+ async def insert(self, edge: AgentFlowEdge) -> AgentFlowEdge:
1330
+ """Insert a single edge asynchronously."""
1331
+ response = await self._client.post(
1332
+ f"{self.url}/api/v1/edges",
1333
+ json=edge.model_dump(),
1334
+ )
1335
+ response.raise_for_status()
1336
+ return AgentFlowEdge(**response.json())
1337
+
1338
+ async def insert_batch(self, edges: List[AgentFlowEdge]) -> List[AgentFlowEdge]:
1339
+ """Insert multiple edges in a batch asynchronously."""
1340
+ response = await self._client.post(
1341
+ f"{self.url}/api/v1/edges/batch",
1342
+ json=[e.model_dump() for e in edges],
1343
+ )
1344
+ response.raise_for_status()
1345
+ return [AgentFlowEdge(**e) for e in response.json()]
1346
+
1347
+ async def submit_feedback(self, trace_id: str, feedback: int) -> dict:
1348
+ """Submit user feedback for a trace asynchronously.
1349
+
1350
+ Args:
1351
+ trace_id: Trace/edge identifier (hex string)
1352
+ feedback: -1 (thumbs down), 0 (neutral), 1 (thumbs up)
1353
+
1354
+ Returns:
1355
+ Response dict with status
1356
+ """
1357
+ if feedback not in {-1, 0, 1}:
1358
+ raise ValueError(f"Feedback must be -1, 0, or 1, got {feedback}")
1359
+
1360
+ response = await self._client.post(
1361
+ f"{self.url}/api/v1/traces/{trace_id}/feedback",
1362
+ json={"feedback": feedback},
1363
+ )
1364
+ response.raise_for_status()
1365
+ return response.json()
1366
+
1367
+ async def add_to_dataset(
1368
+ self, trace_id: str, dataset_name: str, input_data: Optional[dict] = None, output_data: Optional[dict] = None
1369
+ ) -> dict:
1370
+ """Add a trace to an evaluation dataset asynchronously.
1371
+
1372
+ Args:
1373
+ trace_id: Trace/edge identifier (hex string)
1374
+ dataset_name: Name of dataset to add to
1375
+ input_data: Optional input data to store with trace
1376
+ output_data: Optional output data to store with trace
1377
+
1378
+ Returns:
1379
+ Response dict with status
1380
+ """
1381
+ payload = {"trace_id": trace_id}
1382
+ if input_data:
1383
+ payload["input"] = input_data
1384
+ if output_data:
1385
+ payload["output"] = output_data
1386
+
1387
+ response = await self._client.post(
1388
+ f"{self.url}/api/v1/datasets/{dataset_name}/add",
1389
+ json=payload,
1390
+ )
1391
+ response.raise_for_status()
1392
+ return response.json()
1393
+
1394
+ async def get(self, edge_id: int) -> Optional[AgentFlowEdge]:
1395
+ """Get an edge by ID asynchronously.
1396
+
1397
+ Args:
1398
+ edge_id: Edge identifier
1399
+
1400
+ Returns:
1401
+ Edge if found, None otherwise
1402
+ """
1403
+ response = await self._client.get(f"{self.url}/api/v1/edges/{edge_id}")
1404
+ if response.status_code == 404:
1405
+ return None
1406
+ response.raise_for_status()
1407
+ return AgentFlowEdge(**response.json())
1408
+
1409
+ async def query_temporal_range(
1410
+ self,
1411
+ start_timestamp_us: int,
1412
+ end_timestamp_us: int,
1413
+ filter: Optional[QueryFilter] = None,
1414
+ ) -> QueryResponse:
1415
+ """Query edges in a temporal range asynchronously.
1416
+
1417
+ Args:
1418
+ start_timestamp_us: Start timestamp (microseconds since epoch)
1419
+ end_timestamp_us: End timestamp (microseconds since epoch)
1420
+ filter: Optional query filters
1421
+
1422
+ Returns:
1423
+ Query response with matching edges
1424
+ """
1425
+ params = {
1426
+ "start": start_timestamp_us,
1427
+ "end": end_timestamp_us,
1428
+ }
1429
+ if filter:
1430
+ params.update(filter.model_dump(exclude_none=True))
1431
+
1432
+ response = await self._client.get(
1433
+ f"{self.url}/api/v1/edges/query",
1434
+ params=params,
1435
+ )
1436
+ response.raise_for_status()
1437
+ return QueryResponse(**response.json())
1438
+
1439
+ async def get_children(self, edge_id: int) -> List[AgentFlowEdge]:
1440
+ """Get direct children of an edge asynchronously.
1441
+
1442
+ Args:
1443
+ edge_id: Parent edge identifier
1444
+
1445
+ Returns:
1446
+ List of child edges
1447
+ """
1448
+ response = await self._client.get(f"{self.url}/api/v1/edges/{edge_id}/children")
1449
+ response.raise_for_status()
1450
+ return [AgentFlowEdge(**e) for e in response.json()]
1451
+
1452
+ async def get_ancestors(self, edge_id: int) -> List[AgentFlowEdge]:
1453
+ """Get all ancestors of an edge (path to root) asynchronously.
1454
+
1455
+ Args:
1456
+ edge_id: Edge identifier
1457
+
1458
+ Returns:
1459
+ List of ancestor edges (root to immediate parent)
1460
+ """
1461
+ response = await self._client.get(f"{self.url}/api/v1/edges/{edge_id}/ancestors")
1462
+ response.raise_for_status()
1463
+ return [AgentFlowEdge(**e) for e in response.json()]
1464
+
1465
+ async def stream_chat(
1466
+ self,
1467
+ provider: str,
1468
+ messages: List[dict],
1469
+ model: Optional[str] = None,
1470
+ on_token: Optional[Callable[[str], None]] = None
1471
+ ) -> AsyncIterator[str]:
1472
+ """Stream chat completion from LLM provider.
1473
+
1474
+ Args:
1475
+ provider: Provider ID (e.g., 'openai', 'anthropic')
1476
+ messages: List of message dicts with 'role' and 'content'
1477
+ model: Optional model name override
1478
+ on_token: Optional callback for each token
1479
+
1480
+ Yields:
1481
+ Token strings as they arrive from the LLM
1482
+
1483
+ Example:
1484
+ >>> async for token in client.stream_chat(
1485
+ ... provider="openai",
1486
+ ... messages=[{"role": "user", "content": "Hello!"}]
1487
+ ... ):
1488
+ ... print(token, end="", flush=True)
1489
+ """
1490
+ payload = {
1491
+ "provider": provider,
1492
+ "messages": messages,
1493
+ }
1494
+ if model:
1495
+ payload["model"] = model
1496
+
1497
+ async with self._client.stream(
1498
+ "POST",
1499
+ f"{self.url}/api/v1/chat/stream",
1500
+ json=payload,
1501
+ ) as response:
1502
+ response.raise_for_status()
1503
+ async for line in response.aiter_lines():
1504
+ if line.startswith("data: "):
1505
+ token = line[6:] # Strip "data: " prefix
1506
+ if on_token:
1507
+ on_token(token)
1508
+ yield token
1509
+
1510
+ async def chat_completion(
1511
+ self,
1512
+ provider: str,
1513
+ messages: List[dict],
1514
+ model: Optional[str] = None,
1515
+ ) -> dict:
1516
+ """Get complete chat response from LLM provider.
1517
+
1518
+ Args:
1519
+ provider: Provider ID (e.g., 'openai', 'anthropic')
1520
+ messages: List of message dicts with 'role' and 'content'
1521
+ model: Optional model name override
1522
+
1523
+ Returns:
1524
+ Dict with 'content', 'provider', 'model', 'tokens_used', 'duration_ms'
1525
+
1526
+ Example:
1527
+ >>> response = await client.chat_completion(
1528
+ ... provider="openai",
1529
+ ... messages=[{"role": "user", "content": "Hello!"}]
1530
+ ... )
1531
+ >>> print(response["content"])
1532
+ """
1533
+ payload = {
1534
+ "provider": provider,
1535
+ "messages": messages,
1536
+ }
1537
+ if model:
1538
+ payload["model"] = model
1539
+
1540
+ response = await self._client.post(
1541
+ f"{self.url}/api/v1/chat/completions",
1542
+ json=payload,
1543
+ )
1544
+ response.raise_for_status()
1545
+ return response.json()
1546
+
1547
+ async def list_llm_models(self) -> dict:
1548
+ """List available LLM providers and models.
1549
+
1550
+ Returns:
1551
+ Dict with 'providers' list containing provider info
1552
+
1553
+ Example:
1554
+ >>> models = await client.list_llm_models()
1555
+ >>> for provider in models["providers"]:
1556
+ ... print(f"{provider['name']}: {provider['models']}")
1557
+ """
1558
+ response = await self._client.get(f"{self.url}/api/v1/chat/models")
1559
+ response.raise_for_status()
1560
+ return response.json()