cite-agent 1.3.9__py3-none-any.whl → 1.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cite_agent/__init__.py +13 -13
- cite_agent/__version__.py +1 -1
- cite_agent/action_first_mode.py +150 -0
- cite_agent/adaptive_providers.py +413 -0
- cite_agent/archive_api_client.py +186 -0
- cite_agent/auth.py +0 -1
- cite_agent/auto_expander.py +70 -0
- cite_agent/cache.py +379 -0
- cite_agent/circuit_breaker.py +370 -0
- cite_agent/citation_network.py +377 -0
- cite_agent/cli.py +8 -16
- cite_agent/cli_conversational.py +113 -3
- cite_agent/confidence_calibration.py +381 -0
- cite_agent/deduplication.py +325 -0
- cite_agent/enhanced_ai_agent.py +689 -371
- cite_agent/error_handler.py +228 -0
- cite_agent/execution_safety.py +329 -0
- cite_agent/full_paper_reader.py +239 -0
- cite_agent/observability.py +398 -0
- cite_agent/offline_mode.py +348 -0
- cite_agent/paper_comparator.py +368 -0
- cite_agent/paper_summarizer.py +420 -0
- cite_agent/pdf_extractor.py +350 -0
- cite_agent/proactive_boundaries.py +266 -0
- cite_agent/quality_gate.py +442 -0
- cite_agent/request_queue.py +390 -0
- cite_agent/response_enhancer.py +257 -0
- cite_agent/response_formatter.py +458 -0
- cite_agent/response_pipeline.py +295 -0
- cite_agent/response_style_enhancer.py +259 -0
- cite_agent/self_healing.py +418 -0
- cite_agent/similarity_finder.py +524 -0
- cite_agent/streaming_ui.py +13 -9
- cite_agent/thinking_blocks.py +308 -0
- cite_agent/tool_orchestrator.py +416 -0
- cite_agent/trend_analyzer.py +540 -0
- cite_agent/unpaywall_client.py +226 -0
- {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/METADATA +15 -1
- cite_agent-1.4.3.dist-info/RECORD +62 -0
- cite_agent-1.3.9.dist-info/RECORD +0 -32
- {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/WHEEL +0 -0
- {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/entry_points.txt +0 -0
- {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/licenses/LICENSE +0 -0
- {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Full Paper Reader Integration
|
|
4
|
+
Combines search + PDF extraction + summarization into one killer feature
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from typing import Dict, List, Any, Optional
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
async def read_full_papers_workflow(
|
|
14
|
+
agent, # EnhancedNocturnalAgent instance
|
|
15
|
+
query: str,
|
|
16
|
+
limit: int = 5,
|
|
17
|
+
summarize: bool = True
|
|
18
|
+
) -> Dict[str, Any]:
|
|
19
|
+
"""
|
|
20
|
+
🔥 KILLER FEATURE: Search, download, and READ full academic papers
|
|
21
|
+
|
|
22
|
+
This is the game-changer - skip reading papers yourself!
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
agent: Agent instance with search capabilities
|
|
26
|
+
query: Search query
|
|
27
|
+
limit: Number of papers to find and read
|
|
28
|
+
summarize: If True, summarize each paper (methodology, findings, etc.)
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
{
|
|
32
|
+
'query': str,
|
|
33
|
+
'papers_found': int,
|
|
34
|
+
'papers_read': int,
|
|
35
|
+
'papers': [
|
|
36
|
+
{
|
|
37
|
+
'title': str,
|
|
38
|
+
'doi': str,
|
|
39
|
+
'pdf_url': str,
|
|
40
|
+
'summary': {
|
|
41
|
+
'research_question': str,
|
|
42
|
+
'methodology': str,
|
|
43
|
+
'key_findings': [str, ...],
|
|
44
|
+
'limitations': str,
|
|
45
|
+
'implications': str
|
|
46
|
+
},
|
|
47
|
+
'full_text_available': bool,
|
|
48
|
+
'word_count': int
|
|
49
|
+
},
|
|
50
|
+
...
|
|
51
|
+
],
|
|
52
|
+
'synthesis': str # Overall summary across all papers
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
Example:
|
|
56
|
+
>>> result = await read_full_papers_workflow(agent, "ESG investing performance", limit=3)
|
|
57
|
+
>>> print(result['synthesis'])
|
|
58
|
+
"Based on 3 papers:
|
|
59
|
+
- All 3 found positive ESG-performance correlation
|
|
60
|
+
- Effect sizes range from +1.2% to +4.1% annually
|
|
61
|
+
- Methodological gap: most studies focus on large-cap stocks"
|
|
62
|
+
"""
|
|
63
|
+
logger.info(f"🔥 FULL PAPER READING: {query} (limit={limit})")
|
|
64
|
+
|
|
65
|
+
# Import the services
|
|
66
|
+
try:
|
|
67
|
+
from .pdf_extractor import pdf_extractor
|
|
68
|
+
from .unpaywall_client import unpaywall
|
|
69
|
+
from .paper_summarizer import PaperSummarizer, PaperSummary
|
|
70
|
+
except ImportError as e:
|
|
71
|
+
logger.error(f"PDF reading libraries not installed: {e}")
|
|
72
|
+
return {
|
|
73
|
+
'error': 'PDF reading not available. Install: pip install pypdf2 pdfplumber pymupdf',
|
|
74
|
+
'papers_found': 0,
|
|
75
|
+
'papers_read': 0
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
# Step 1: Search for papers
|
|
79
|
+
search_results = await agent.search_academic_papers(query, limit=limit)
|
|
80
|
+
papers_data = search_results.get('results', [])
|
|
81
|
+
|
|
82
|
+
if not papers_data:
|
|
83
|
+
return {
|
|
84
|
+
'query': query,
|
|
85
|
+
'papers_found': 0,
|
|
86
|
+
'papers_read': 0,
|
|
87
|
+
'papers': [],
|
|
88
|
+
'message': 'No papers found'
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
logger.info(f"Found {len(papers_data)} papers, attempting to read full text...")
|
|
92
|
+
|
|
93
|
+
# Step 2: For each paper, try to get PDF and extract
|
|
94
|
+
papers_output = []
|
|
95
|
+
successfully_read = 0
|
|
96
|
+
|
|
97
|
+
for paper in papers_data:
|
|
98
|
+
title = paper.get('title', 'Unknown')
|
|
99
|
+
doi = paper.get('doi')
|
|
100
|
+
pdf_url = paper.get('pdf_url')
|
|
101
|
+
authors = paper.get('authors', [])
|
|
102
|
+
year = paper.get('year')
|
|
103
|
+
|
|
104
|
+
logger.info(f"Processing: {title}")
|
|
105
|
+
|
|
106
|
+
paper_result = {
|
|
107
|
+
'title': title,
|
|
108
|
+
'doi': doi,
|
|
109
|
+
'authors': [a.get('name') if isinstance(a, dict) else a for a in authors],
|
|
110
|
+
'year': year,
|
|
111
|
+
'pdf_url': pdf_url,
|
|
112
|
+
'full_text_available': False
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
# Try to get PDF URL if not provided
|
|
116
|
+
if not pdf_url and doi:
|
|
117
|
+
logger.info(f" No PDF URL, checking Unpaywall for DOI: {doi}")
|
|
118
|
+
pdf_url = await unpaywall.get_pdf_url(doi)
|
|
119
|
+
if pdf_url:
|
|
120
|
+
logger.info(f" ✅ Found open access PDF via Unpaywall")
|
|
121
|
+
paper_result['pdf_url'] = pdf_url
|
|
122
|
+
paper_result['source'] = 'unpaywall'
|
|
123
|
+
|
|
124
|
+
# Try to extract full text
|
|
125
|
+
if pdf_url:
|
|
126
|
+
logger.info(f" Extracting PDF from: {pdf_url[:80]}...")
|
|
127
|
+
try:
|
|
128
|
+
extracted = await pdf_extractor.extract_from_url(pdf_url)
|
|
129
|
+
|
|
130
|
+
if extracted.extraction_quality in ('high', 'medium'):
|
|
131
|
+
logger.info(f" ✅ Successfully extracted {extracted.word_count} words")
|
|
132
|
+
successfully_read += 1
|
|
133
|
+
paper_result['full_text_available'] = True
|
|
134
|
+
paper_result['word_count'] = extracted.word_count
|
|
135
|
+
paper_result['page_count'] = extracted.page_count
|
|
136
|
+
paper_result['extraction_quality'] = extracted.extraction_quality
|
|
137
|
+
|
|
138
|
+
# Summarize if requested
|
|
139
|
+
if summarize and agent.client:
|
|
140
|
+
logger.info(f" Summarizing paper...")
|
|
141
|
+
summarizer = PaperSummarizer(agent.client)
|
|
142
|
+
summary = await summarizer.summarize_paper(
|
|
143
|
+
extracted,
|
|
144
|
+
doi=doi,
|
|
145
|
+
authors=paper_result['authors'],
|
|
146
|
+
year=year
|
|
147
|
+
)
|
|
148
|
+
paper_result['summary'] = {
|
|
149
|
+
'research_question': summary.research_question,
|
|
150
|
+
'methodology': summary.methodology,
|
|
151
|
+
'key_findings': summary.key_findings,
|
|
152
|
+
'limitations': summary.limitations,
|
|
153
|
+
'implications': summary.implications,
|
|
154
|
+
'confidence': summary.confidence
|
|
155
|
+
}
|
|
156
|
+
else:
|
|
157
|
+
# Basic extraction without LLM
|
|
158
|
+
paper_result['sections'] = {
|
|
159
|
+
'abstract': extracted.abstract,
|
|
160
|
+
'introduction': extracted.introduction[:500] if extracted.introduction else None,
|
|
161
|
+
'methodology': extracted.methodology[:500] if extracted.methodology else None,
|
|
162
|
+
'results': extracted.results[:500] if extracted.results else None,
|
|
163
|
+
'conclusion': extracted.conclusion[:500] if extracted.conclusion else None
|
|
164
|
+
}
|
|
165
|
+
else:
|
|
166
|
+
logger.warning(f" ⚠️ Low quality extraction: {extracted.error_message}")
|
|
167
|
+
paper_result['extraction_error'] = extracted.error_message
|
|
168
|
+
|
|
169
|
+
except Exception as e:
|
|
170
|
+
logger.error(f" ❌ PDF extraction failed: {e}")
|
|
171
|
+
paper_result['extraction_error'] = str(e)
|
|
172
|
+
else:
|
|
173
|
+
logger.info(f" ⚠️ No PDF URL available (paywalled)")
|
|
174
|
+
paper_result['note'] = 'Paywalled - no open access version found'
|
|
175
|
+
|
|
176
|
+
papers_output.append(paper_result)
|
|
177
|
+
|
|
178
|
+
# Step 3: Generate synthesis across all successfully read papers
|
|
179
|
+
synthesis = _synthesize_multiple_papers(papers_output, query)
|
|
180
|
+
|
|
181
|
+
result = {
|
|
182
|
+
'query': query,
|
|
183
|
+
'papers_found': len(papers_data),
|
|
184
|
+
'papers_read': successfully_read,
|
|
185
|
+
'papers': papers_output,
|
|
186
|
+
'synthesis': synthesis,
|
|
187
|
+
'success_rate': f"{successfully_read}/{len(papers_data)} ({100*successfully_read//len(papers_data) if papers_data else 0}%)"
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
logger.info(f"✅ COMPLETE: Read {successfully_read}/{len(papers_data)} papers successfully")
|
|
191
|
+
|
|
192
|
+
return result
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _synthesize_multiple_papers(papers: List[Dict[str, Any]], query: str) -> str:
|
|
196
|
+
"""Generate synthesis across multiple papers"""
|
|
197
|
+
readable_papers = [p for p in papers if p.get('full_text_available')]
|
|
198
|
+
|
|
199
|
+
if not readable_papers:
|
|
200
|
+
return "No papers could be read (all paywalled or extraction failed)"
|
|
201
|
+
|
|
202
|
+
synthesis_parts = []
|
|
203
|
+
synthesis_parts.append(f"Based on {len(readable_papers)} papers analyzed:\n")
|
|
204
|
+
|
|
205
|
+
# Collect all findings
|
|
206
|
+
all_findings = []
|
|
207
|
+
all_methodologies = []
|
|
208
|
+
|
|
209
|
+
for paper in readable_papers:
|
|
210
|
+
summary = paper.get('summary', {})
|
|
211
|
+
if summary:
|
|
212
|
+
findings = summary.get('key_findings', [])
|
|
213
|
+
if findings:
|
|
214
|
+
all_findings.extend(findings)
|
|
215
|
+
|
|
216
|
+
methodology = summary.get('methodology')
|
|
217
|
+
if methodology:
|
|
218
|
+
all_methodologies.append(methodology)
|
|
219
|
+
|
|
220
|
+
# Methodology overview
|
|
221
|
+
if all_methodologies:
|
|
222
|
+
synthesis_parts.append(f"METHODOLOGIES USED:")
|
|
223
|
+
for i, method in enumerate(all_methodologies[:3], 1):
|
|
224
|
+
synthesis_parts.append(f" {i}. {method}")
|
|
225
|
+
synthesis_parts.append("")
|
|
226
|
+
|
|
227
|
+
# Key findings
|
|
228
|
+
if all_findings:
|
|
229
|
+
synthesis_parts.append(f"KEY FINDINGS ACROSS PAPERS:")
|
|
230
|
+
for i, finding in enumerate(all_findings[:5], 1):
|
|
231
|
+
synthesis_parts.append(f" • {finding}")
|
|
232
|
+
synthesis_parts.append("")
|
|
233
|
+
|
|
234
|
+
# Success rate
|
|
235
|
+
total = len(papers)
|
|
236
|
+
success = len(readable_papers)
|
|
237
|
+
synthesis_parts.append(f"Coverage: {success}/{total} papers successfully analyzed")
|
|
238
|
+
|
|
239
|
+
return "\n".join(synthesis_parts)
|
|
@@ -0,0 +1,398 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Comprehensive Observability System
|
|
3
|
+
Metrics collection, tracing, and analytics for decision making
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import time
|
|
8
|
+
from dataclasses import dataclass, field, asdict
|
|
9
|
+
from enum import Enum
|
|
10
|
+
from typing import Dict, List, Optional, Any
|
|
11
|
+
from datetime import datetime, timedelta
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
import logging
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class EventType(Enum):
|
|
19
|
+
"""Types of observable events"""
|
|
20
|
+
REQUEST_QUEUED = "request_queued"
|
|
21
|
+
REQUEST_STARTED = "request_started"
|
|
22
|
+
REQUEST_COMPLETED = "request_completed"
|
|
23
|
+
REQUEST_FAILED = "request_failed"
|
|
24
|
+
REQUEST_TIMEOUT = "request_timeout"
|
|
25
|
+
|
|
26
|
+
API_CALL = "api_call"
|
|
27
|
+
API_CALL_SUCCESS = "api_call_success"
|
|
28
|
+
API_CALL_FAILURE = "api_call_failure"
|
|
29
|
+
|
|
30
|
+
CIRCUIT_BREAKER_STATE_CHANGE = "circuit_breaker_state_change"
|
|
31
|
+
RATE_LIMIT_HIT = "rate_limit_hit"
|
|
32
|
+
QUEUE_FULL = "queue_full"
|
|
33
|
+
|
|
34
|
+
PROVIDER_SWITCH = "provider_switch"
|
|
35
|
+
FALLBACK_ACTIVATED = "fallback_activated"
|
|
36
|
+
DEGRADATION_MODE = "degradation_mode"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class ObservableEvent:
|
|
41
|
+
"""A single observable event"""
|
|
42
|
+
event_type: EventType
|
|
43
|
+
timestamp: datetime = field(default_factory=datetime.now)
|
|
44
|
+
user_id: Optional[str] = None
|
|
45
|
+
request_id: Optional[str] = None
|
|
46
|
+
duration_ms: Optional[float] = None # For timing events
|
|
47
|
+
status: Optional[str] = None # success, failure, etc.
|
|
48
|
+
provider: Optional[str] = None # api provider used
|
|
49
|
+
error_message: Optional[str] = None
|
|
50
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
51
|
+
|
|
52
|
+
def to_dict(self) -> Dict:
|
|
53
|
+
"""Convert to dictionary for JSON serialization"""
|
|
54
|
+
data = asdict(self)
|
|
55
|
+
data['event_type'] = self.event_type.value
|
|
56
|
+
data['timestamp'] = self.timestamp.isoformat()
|
|
57
|
+
return data
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class Histogram:
|
|
61
|
+
"""Simple histogram for tracking value distributions"""
|
|
62
|
+
|
|
63
|
+
def __init__(self, name: str, buckets: List[float] = None):
|
|
64
|
+
self.name = name
|
|
65
|
+
self.buckets = buckets or [0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0]
|
|
66
|
+
self.values: Dict[float, int] = {b: 0 for b in self.buckets}
|
|
67
|
+
self.values['inf'] = 0
|
|
68
|
+
self.all_values = []
|
|
69
|
+
|
|
70
|
+
def observe(self, value: float):
|
|
71
|
+
"""Record a value"""
|
|
72
|
+
self.all_values.append(value)
|
|
73
|
+
for bucket in self.buckets:
|
|
74
|
+
if value <= bucket:
|
|
75
|
+
self.values[bucket] += 1
|
|
76
|
+
return
|
|
77
|
+
self.values['inf'] += 1
|
|
78
|
+
|
|
79
|
+
def get_percentile(self, p: float) -> float:
|
|
80
|
+
"""Get percentile (0.0-1.0)"""
|
|
81
|
+
if not self.all_values:
|
|
82
|
+
return 0.0
|
|
83
|
+
sorted_vals = sorted(self.all_values)
|
|
84
|
+
idx = int(len(sorted_vals) * p)
|
|
85
|
+
return sorted_vals[min(idx, len(sorted_vals) - 1)]
|
|
86
|
+
|
|
87
|
+
def get_stats(self) -> Dict[str, float]:
|
|
88
|
+
"""Get distribution statistics"""
|
|
89
|
+
if not self.all_values:
|
|
90
|
+
return {"count": 0}
|
|
91
|
+
|
|
92
|
+
sorted_vals = sorted(self.all_values)
|
|
93
|
+
return {
|
|
94
|
+
"count": len(sorted_vals),
|
|
95
|
+
"min": sorted_vals[0],
|
|
96
|
+
"max": sorted_vals[-1],
|
|
97
|
+
"mean": sum(sorted_vals) / len(sorted_vals),
|
|
98
|
+
"p50": self.get_percentile(0.5),
|
|
99
|
+
"p95": self.get_percentile(0.95),
|
|
100
|
+
"p99": self.get_percentile(0.99),
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@dataclass
|
|
105
|
+
class ProviderMetrics:
|
|
106
|
+
"""Metrics for a specific API provider"""
|
|
107
|
+
provider_name: str
|
|
108
|
+
total_calls: int = 0
|
|
109
|
+
successful_calls: int = 0
|
|
110
|
+
failed_calls: int = 0
|
|
111
|
+
total_latency_ms: float = 0.0
|
|
112
|
+
latency_histogram: Histogram = field(default_factory=lambda: Histogram("latency"))
|
|
113
|
+
error_counts: Dict[str, int] = field(default_factory=dict)
|
|
114
|
+
last_used: Optional[datetime] = None
|
|
115
|
+
|
|
116
|
+
def record_success(self, latency_ms: float):
|
|
117
|
+
"""Record successful API call"""
|
|
118
|
+
self.total_calls += 1
|
|
119
|
+
self.successful_calls += 1
|
|
120
|
+
self.total_latency_ms += latency_ms
|
|
121
|
+
self.latency_histogram.observe(latency_ms / 1000.0) # Convert to seconds
|
|
122
|
+
self.last_used = datetime.now()
|
|
123
|
+
|
|
124
|
+
def record_failure(self, error_type: str, latency_ms: float = 0):
|
|
125
|
+
"""Record failed API call"""
|
|
126
|
+
self.total_calls += 1
|
|
127
|
+
self.failed_calls += 1
|
|
128
|
+
self.total_latency_ms += latency_ms
|
|
129
|
+
self.error_counts[error_type] = self.error_counts.get(error_type, 0) + 1
|
|
130
|
+
self.last_used = datetime.now()
|
|
131
|
+
|
|
132
|
+
def get_success_rate(self) -> float:
|
|
133
|
+
"""Get success rate 0.0-1.0"""
|
|
134
|
+
if self.total_calls == 0:
|
|
135
|
+
return 0.0
|
|
136
|
+
return self.successful_calls / self.total_calls
|
|
137
|
+
|
|
138
|
+
def get_avg_latency_ms(self) -> float:
|
|
139
|
+
"""Get average latency in ms"""
|
|
140
|
+
if self.total_calls == 0:
|
|
141
|
+
return 0.0
|
|
142
|
+
return self.total_latency_ms / self.total_calls
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
@dataclass
|
|
146
|
+
class UserBehaviorMetrics:
|
|
147
|
+
"""Metrics about a specific user's behavior"""
|
|
148
|
+
user_id: str
|
|
149
|
+
total_requests: int = 0
|
|
150
|
+
total_api_calls: int = 0
|
|
151
|
+
total_failures: int = 0
|
|
152
|
+
most_common_provider: Optional[str] = None
|
|
153
|
+
avg_requests_per_hour: float = 0.0
|
|
154
|
+
first_seen: datetime = field(default_factory=datetime.now)
|
|
155
|
+
last_seen: Optional[datetime] = None
|
|
156
|
+
preferred_providers: Dict[str, int] = field(default_factory=dict)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class ObservabilitySystem:
|
|
160
|
+
"""
|
|
161
|
+
Central observability system collecting metrics from all components
|
|
162
|
+
|
|
163
|
+
Tracks:
|
|
164
|
+
- Request latencies (p50, p95, p99)
|
|
165
|
+
- Provider performance (success rate, latency, errors)
|
|
166
|
+
- User behavior patterns
|
|
167
|
+
- Error types and frequencies
|
|
168
|
+
- Circuit breaker state changes
|
|
169
|
+
- Rate limit hits and queue fills
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
def __init__(self, storage_dir: Optional[Path] = None):
|
|
173
|
+
self.storage_dir = storage_dir or Path.home() / ".nocturnal_archive" / "observability"
|
|
174
|
+
self.storage_dir.mkdir(parents=True, exist_ok=True)
|
|
175
|
+
|
|
176
|
+
# Event log
|
|
177
|
+
self.events: List[ObservableEvent] = []
|
|
178
|
+
self.event_index = 0
|
|
179
|
+
|
|
180
|
+
# Metrics
|
|
181
|
+
self.provider_metrics: Dict[str, ProviderMetrics] = {}
|
|
182
|
+
self.user_metrics: Dict[str, UserBehaviorMetrics] = {}
|
|
183
|
+
self.request_latencies = Histogram("request_latency")
|
|
184
|
+
|
|
185
|
+
# Counters
|
|
186
|
+
self.counters: Dict[str, int] = {
|
|
187
|
+
"total_requests": 0,
|
|
188
|
+
"total_failures": 0,
|
|
189
|
+
"circuit_breaks": 0,
|
|
190
|
+
"rate_limits": 0,
|
|
191
|
+
"fallbacks": 0,
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
def record_event(self, event: ObservableEvent):
|
|
195
|
+
"""Record an observable event"""
|
|
196
|
+
self.events.append(event)
|
|
197
|
+
self.event_index += 1
|
|
198
|
+
|
|
199
|
+
# Log to file periodically
|
|
200
|
+
if len(self.events) % 100 == 0:
|
|
201
|
+
self._flush_events()
|
|
202
|
+
|
|
203
|
+
# Update metrics based on event type
|
|
204
|
+
if event.event_type == EventType.REQUEST_COMPLETED:
|
|
205
|
+
self.counters["total_requests"] += 1
|
|
206
|
+
if event.duration_ms:
|
|
207
|
+
self.request_latencies.observe(event.duration_ms / 1000.0)
|
|
208
|
+
|
|
209
|
+
elif event.event_type == EventType.REQUEST_FAILED:
|
|
210
|
+
self.counters["total_failures"] += 1
|
|
211
|
+
|
|
212
|
+
elif event.event_type == EventType.CIRCUIT_BREAKER_STATE_CHANGE:
|
|
213
|
+
self.counters["circuit_breaks"] += 1
|
|
214
|
+
|
|
215
|
+
elif event.event_type == EventType.RATE_LIMIT_HIT:
|
|
216
|
+
self.counters["rate_limits"] += 1
|
|
217
|
+
|
|
218
|
+
elif event.event_type == EventType.FALLBACK_ACTIVATED:
|
|
219
|
+
self.counters["fallbacks"] += 1
|
|
220
|
+
|
|
221
|
+
# Update user metrics
|
|
222
|
+
if event.user_id:
|
|
223
|
+
self._update_user_metrics(event)
|
|
224
|
+
|
|
225
|
+
# Update provider metrics
|
|
226
|
+
if event.provider:
|
|
227
|
+
self._update_provider_metrics(event)
|
|
228
|
+
|
|
229
|
+
def _update_user_metrics(self, event: ObservableEvent):
|
|
230
|
+
"""Update metrics for a specific user"""
|
|
231
|
+
user_id = event.user_id
|
|
232
|
+
if user_id not in self.user_metrics:
|
|
233
|
+
self.user_metrics[user_id] = UserBehaviorMetrics(user_id=user_id)
|
|
234
|
+
|
|
235
|
+
metrics = self.user_metrics[user_id]
|
|
236
|
+
|
|
237
|
+
if event.event_type == EventType.REQUEST_COMPLETED:
|
|
238
|
+
metrics.total_requests += 1
|
|
239
|
+
metrics.last_seen = event.timestamp
|
|
240
|
+
|
|
241
|
+
if event.provider:
|
|
242
|
+
metrics.total_api_calls += 1
|
|
243
|
+
metrics.preferred_providers[event.provider] = \
|
|
244
|
+
metrics.preferred_providers.get(event.provider, 0) + 1
|
|
245
|
+
metrics.most_common_provider = max(
|
|
246
|
+
metrics.preferred_providers,
|
|
247
|
+
key=metrics.preferred_providers.get
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
if event.event_type == EventType.REQUEST_FAILED:
|
|
251
|
+
metrics.total_failures += 1
|
|
252
|
+
|
|
253
|
+
def _update_provider_metrics(self, event: ObservableEvent):
|
|
254
|
+
"""Update metrics for a specific provider"""
|
|
255
|
+
provider = event.provider
|
|
256
|
+
if provider not in self.provider_metrics:
|
|
257
|
+
self.provider_metrics[provider] = ProviderMetrics(provider_name=provider)
|
|
258
|
+
|
|
259
|
+
metrics = self.provider_metrics[provider]
|
|
260
|
+
|
|
261
|
+
if event.event_type == EventType.API_CALL_SUCCESS:
|
|
262
|
+
metrics.record_success(event.duration_ms or 0)
|
|
263
|
+
elif event.event_type == EventType.API_CALL_FAILURE:
|
|
264
|
+
metrics.record_failure(event.error_message or "unknown", event.duration_ms or 0)
|
|
265
|
+
|
|
266
|
+
def record_api_call(
|
|
267
|
+
self,
|
|
268
|
+
provider: str,
|
|
269
|
+
user_id: Optional[str] = None,
|
|
270
|
+
request_id: Optional[str] = None,
|
|
271
|
+
duration_ms: float = 0,
|
|
272
|
+
success: bool = True,
|
|
273
|
+
error: Optional[str] = None
|
|
274
|
+
):
|
|
275
|
+
"""Convenience method to record an API call"""
|
|
276
|
+
event_type = EventType.API_CALL_SUCCESS if success else EventType.API_CALL_FAILURE
|
|
277
|
+
event = ObservableEvent(
|
|
278
|
+
event_type=event_type,
|
|
279
|
+
user_id=user_id,
|
|
280
|
+
request_id=request_id,
|
|
281
|
+
provider=provider,
|
|
282
|
+
duration_ms=duration_ms,
|
|
283
|
+
status="success" if success else "failure",
|
|
284
|
+
error_message=error
|
|
285
|
+
)
|
|
286
|
+
self.record_event(event)
|
|
287
|
+
|
|
288
|
+
def _flush_events(self):
|
|
289
|
+
"""Write events to disk"""
|
|
290
|
+
try:
|
|
291
|
+
filename = self.storage_dir / f"events_{datetime.now().strftime('%Y%m%d_%H%M%S')}.jsonl"
|
|
292
|
+
with open(filename, 'a') as f:
|
|
293
|
+
for event in self.events[-100:]:
|
|
294
|
+
f.write(json.dumps(event.to_dict()) + '\n')
|
|
295
|
+
except Exception as e:
|
|
296
|
+
logger.error(f"Failed to flush events: {e}")
|
|
297
|
+
|
|
298
|
+
def get_provider_rankings(self) -> List[tuple]:
|
|
299
|
+
"""Rank providers by performance"""
|
|
300
|
+
rankings = []
|
|
301
|
+
for provider_name, metrics in self.provider_metrics.items():
|
|
302
|
+
score = (
|
|
303
|
+
metrics.get_success_rate() * 100 - # Success rate (0-100)
|
|
304
|
+
(metrics.get_avg_latency_ms() / 1000) # Latency penalty
|
|
305
|
+
)
|
|
306
|
+
rankings.append((provider_name, score, metrics))
|
|
307
|
+
|
|
308
|
+
return sorted(rankings, key=lambda x: x[1], reverse=True)
|
|
309
|
+
|
|
310
|
+
def get_best_provider(self, exclude: Optional[List[str]] = None) -> Optional[str]:
|
|
311
|
+
"""Get highest-performing provider"""
|
|
312
|
+
rankings = self.get_provider_rankings()
|
|
313
|
+
exclude = exclude or []
|
|
314
|
+
|
|
315
|
+
for provider_name, score, metrics in rankings:
|
|
316
|
+
if provider_name not in exclude and metrics.total_calls > 0:
|
|
317
|
+
return provider_name
|
|
318
|
+
|
|
319
|
+
return None
|
|
320
|
+
|
|
321
|
+
def get_metrics_summary(self) -> Dict[str, Any]:
|
|
322
|
+
"""Get comprehensive metrics summary"""
|
|
323
|
+
request_stats = self.request_latencies.get_stats()
|
|
324
|
+
|
|
325
|
+
return {
|
|
326
|
+
"timestamp": datetime.now().isoformat(),
|
|
327
|
+
"requests": {
|
|
328
|
+
"total": self.counters["total_requests"],
|
|
329
|
+
"failures": self.counters["total_failures"],
|
|
330
|
+
"failure_rate": (
|
|
331
|
+
self.counters["total_failures"] / max(1, self.counters["total_requests"])
|
|
332
|
+
),
|
|
333
|
+
"latency": request_stats,
|
|
334
|
+
},
|
|
335
|
+
"providers": {
|
|
336
|
+
name: {
|
|
337
|
+
"total_calls": m.total_calls,
|
|
338
|
+
"success_rate": m.get_success_rate(),
|
|
339
|
+
"avg_latency_ms": m.get_avg_latency_ms(),
|
|
340
|
+
"errors": m.error_counts,
|
|
341
|
+
}
|
|
342
|
+
for name, m in self.provider_metrics.items()
|
|
343
|
+
},
|
|
344
|
+
"users": {
|
|
345
|
+
"total": len(self.user_metrics),
|
|
346
|
+
"active": len([u for u in self.user_metrics.values() if u.last_seen]),
|
|
347
|
+
},
|
|
348
|
+
"incidents": {
|
|
349
|
+
"circuit_breaks": self.counters["circuit_breaks"],
|
|
350
|
+
"rate_limits": self.counters["rate_limits"],
|
|
351
|
+
"fallbacks": self.counters["fallbacks"],
|
|
352
|
+
},
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
def get_status_message(self) -> str:
|
|
356
|
+
"""Human-readable observability status"""
|
|
357
|
+
summary = self.get_metrics_summary()
|
|
358
|
+
req = summary["requests"]
|
|
359
|
+
|
|
360
|
+
lines = [
|
|
361
|
+
"📊 **Observability Summary**",
|
|
362
|
+
f"• Total requests: {req['total']} | Failures: {req['failures']} | Rate: {req['failure_rate']:.1%}",
|
|
363
|
+
f"• Latency: p50={req['latency'].get('p50', 0):.2f}s | p95={req['latency'].get('p95', 0):.2f}s | p99={req['latency'].get('p99', 0):.2f}s",
|
|
364
|
+
f"• Users: {summary['users']['total']} total | {summary['users']['active']} active",
|
|
365
|
+
f"• Incidents: {summary['incidents']['circuit_breaks']} circuit breaks | {summary['incidents']['rate_limits']} rate limits | {summary['incidents']['fallbacks']} fallbacks",
|
|
366
|
+
]
|
|
367
|
+
|
|
368
|
+
if self.provider_metrics:
|
|
369
|
+
lines.append("\n📈 **Provider Performance**")
|
|
370
|
+
for provider_name, score, metrics in self.get_provider_rankings()[:3]:
|
|
371
|
+
lines.append(
|
|
372
|
+
f" • {provider_name}: {metrics.get_success_rate():.1%} success | "
|
|
373
|
+
f"{metrics.get_avg_latency_ms():.0f}ms avg latency"
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
return "\n".join(lines)
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
# Global observability instance
|
|
380
|
+
observability = ObservabilitySystem()
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
if __name__ == "__main__":
|
|
384
|
+
# Test the observability system
|
|
385
|
+
obs = ObservabilitySystem()
|
|
386
|
+
|
|
387
|
+
# Simulate some events
|
|
388
|
+
for i in range(20):
|
|
389
|
+
obs.record_api_call(
|
|
390
|
+
provider="cerebras" if i % 2 == 0 else "groq",
|
|
391
|
+
user_id=f"user_{i % 3}",
|
|
392
|
+
request_id=f"req_{i}",
|
|
393
|
+
duration_ms=100 + i * 10,
|
|
394
|
+
success=i % 5 != 0 # 20% failure rate
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
print(obs.get_status_message())
|
|
398
|
+
print("\n" + json.dumps(obs.get_metrics_summary(), indent=2, default=str))
|