plain.observer 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of plain.observer might be problematic. Click here for more details.
- plain/observer/CHANGELOG.md +1 -0
- plain/observer/README.md +3 -0
- plain/observer/__init__.py +0 -0
- plain/observer/admin.py +102 -0
- plain/observer/cli.py +23 -0
- plain/observer/config.py +36 -0
- plain/observer/core.py +63 -0
- plain/observer/default_settings.py +9 -0
- plain/observer/migrations/0001_initial.py +96 -0
- plain/observer/migrations/__init__.py +0 -0
- plain/observer/models.py +355 -0
- plain/observer/otel.py +335 -0
- plain/observer/templates/admin/observer/trace_detail.html +10 -0
- plain/observer/templates/observer/_trace_detail.html +364 -0
- plain/observer/templates/observer/traces.html +288 -0
- plain/observer/templates/toolbar/observer.html +42 -0
- plain/observer/templates/toolbar/observer_button.html +45 -0
- plain/observer/urls.py +10 -0
- plain/observer/views.py +105 -0
- plain_observer-0.0.0.dist-info/METADATA +16 -0
- plain_observer-0.0.0.dist-info/RECORD +23 -0
- plain_observer-0.0.0.dist-info/WHEEL +4 -0
- plain_observer-0.0.0.dist-info/licenses/LICENSE +28 -0
plain/observer/models.py
ADDED
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from datetime import UTC, datetime
|
|
3
|
+
from functools import cached_property
|
|
4
|
+
|
|
5
|
+
import sqlparse
|
|
6
|
+
from opentelemetry.semconv._incubating.attributes import (
|
|
7
|
+
exception_attributes,
|
|
8
|
+
session_attributes,
|
|
9
|
+
user_attributes,
|
|
10
|
+
)
|
|
11
|
+
from opentelemetry.semconv._incubating.attributes.db_attributes import (
|
|
12
|
+
DB_QUERY_PARAMETER_TEMPLATE,
|
|
13
|
+
)
|
|
14
|
+
from opentelemetry.semconv.attributes import db_attributes
|
|
15
|
+
from opentelemetry.trace import format_trace_id
|
|
16
|
+
|
|
17
|
+
from plain import models
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@models.register_model
|
|
21
|
+
class Trace(models.Model):
|
|
22
|
+
trace_id = models.CharField(max_length=255)
|
|
23
|
+
start_time = models.DateTimeField()
|
|
24
|
+
end_time = models.DateTimeField()
|
|
25
|
+
|
|
26
|
+
root_span_name = models.TextField(default="", required=False)
|
|
27
|
+
|
|
28
|
+
# Plain fields
|
|
29
|
+
request_id = models.CharField(max_length=255, default="", required=False)
|
|
30
|
+
session_id = models.CharField(max_length=255, default="", required=False)
|
|
31
|
+
user_id = models.CharField(max_length=255, default="", required=False)
|
|
32
|
+
|
|
33
|
+
class Meta:
|
|
34
|
+
ordering = ["-start_time"]
|
|
35
|
+
constraints = [
|
|
36
|
+
models.UniqueConstraint(
|
|
37
|
+
fields=["trace_id"],
|
|
38
|
+
name="observer_unique_trace_id",
|
|
39
|
+
)
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
def __str__(self):
|
|
43
|
+
return self.trace_id
|
|
44
|
+
|
|
45
|
+
def duration_ms(self):
|
|
46
|
+
return (self.end_time - self.start_time).total_seconds() * 1000
|
|
47
|
+
|
|
48
|
+
def get_trace_summary(self, spans=None):
|
|
49
|
+
"""Get a concise summary string for toolbar display.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
spans: Optional list of span objects. If not provided, will query from database.
|
|
53
|
+
"""
|
|
54
|
+
# Get spans from database if not provided
|
|
55
|
+
if spans is None:
|
|
56
|
+
spans = list(self.spans.all())
|
|
57
|
+
|
|
58
|
+
if not spans:
|
|
59
|
+
return ""
|
|
60
|
+
|
|
61
|
+
# Count database queries and track duplicates
|
|
62
|
+
query_counts = {}
|
|
63
|
+
db_queries = 0
|
|
64
|
+
|
|
65
|
+
for span in spans:
|
|
66
|
+
if span.attributes.get(db_attributes.DB_SYSTEM_NAME):
|
|
67
|
+
db_queries += 1
|
|
68
|
+
if query_text := span.attributes.get(db_attributes.DB_QUERY_TEXT):
|
|
69
|
+
query_counts[query_text] = query_counts.get(query_text, 0) + 1
|
|
70
|
+
|
|
71
|
+
# Count duplicate queries (queries that appear more than once)
|
|
72
|
+
duplicate_count = sum(count - 1 for count in query_counts.values() if count > 1)
|
|
73
|
+
|
|
74
|
+
# Build summary: "n spans, n queries (n duplicates), Xms"
|
|
75
|
+
parts = []
|
|
76
|
+
|
|
77
|
+
# Queries count with duplicates
|
|
78
|
+
if db_queries > 0:
|
|
79
|
+
query_part = f"{db_queries} quer{'y' if db_queries == 1 else 'ies'}"
|
|
80
|
+
if duplicate_count > 0:
|
|
81
|
+
query_part += f" ({duplicate_count} duplicate{'' if duplicate_count == 1 else 's'})"
|
|
82
|
+
parts.append(query_part)
|
|
83
|
+
|
|
84
|
+
# Duration
|
|
85
|
+
if (duration_ms := self.duration_ms()) is not None:
|
|
86
|
+
parts.append(f"{round(duration_ms, 1)}ms")
|
|
87
|
+
|
|
88
|
+
return " • ".join(parts)
|
|
89
|
+
|
|
90
|
+
@classmethod
|
|
91
|
+
def from_opentelemetry_spans(cls, spans):
|
|
92
|
+
"""Create a Trace instance from a list of OpenTelemetry spans."""
|
|
93
|
+
# Get trace information from the first span
|
|
94
|
+
first_span = spans[0]
|
|
95
|
+
trace_id = f"0x{format_trace_id(first_span.get_span_context().trace_id)}"
|
|
96
|
+
|
|
97
|
+
# Find trace boundaries and root span info
|
|
98
|
+
earliest_start = None
|
|
99
|
+
latest_end = None
|
|
100
|
+
root_span = None
|
|
101
|
+
request_id = ""
|
|
102
|
+
user_id = ""
|
|
103
|
+
session_id = ""
|
|
104
|
+
|
|
105
|
+
for span in spans:
|
|
106
|
+
if not span.parent:
|
|
107
|
+
root_span = span
|
|
108
|
+
|
|
109
|
+
if span.start_time and (
|
|
110
|
+
earliest_start is None or span.start_time < earliest_start
|
|
111
|
+
):
|
|
112
|
+
earliest_start = span.start_time
|
|
113
|
+
# Only update latest_end if the span has actually ended
|
|
114
|
+
if span.end_time and (latest_end is None or span.end_time > latest_end):
|
|
115
|
+
latest_end = span.end_time
|
|
116
|
+
|
|
117
|
+
# For OpenTelemetry spans, access attributes directly
|
|
118
|
+
span_attrs = getattr(span, "attributes", {})
|
|
119
|
+
request_id = request_id or span_attrs.get("plain.request.id", "")
|
|
120
|
+
user_id = user_id or span_attrs.get(user_attributes.USER_ID, "")
|
|
121
|
+
session_id = session_id or span_attrs.get(session_attributes.SESSION_ID, "")
|
|
122
|
+
|
|
123
|
+
# Convert timestamps
|
|
124
|
+
start_time = (
|
|
125
|
+
datetime.fromtimestamp(earliest_start / 1_000_000_000, tz=UTC)
|
|
126
|
+
if earliest_start
|
|
127
|
+
else None
|
|
128
|
+
)
|
|
129
|
+
end_time = (
|
|
130
|
+
datetime.fromtimestamp(latest_end / 1_000_000_000, tz=UTC)
|
|
131
|
+
if latest_end
|
|
132
|
+
else None
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Create trace instance
|
|
136
|
+
# Note: end_time might be None if there are active spans
|
|
137
|
+
# This is OK since this trace is only used for summaries, not persistence
|
|
138
|
+
return cls(
|
|
139
|
+
trace_id=trace_id,
|
|
140
|
+
start_time=start_time,
|
|
141
|
+
end_time=end_time
|
|
142
|
+
or start_time, # Use start_time as fallback for active traces
|
|
143
|
+
request_id=request_id,
|
|
144
|
+
user_id=user_id,
|
|
145
|
+
session_id=session_id,
|
|
146
|
+
root_span_name=root_span.name if root_span else "",
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
def get_annotated_spans(self):
|
|
150
|
+
"""Return spans with annotations and nesting information."""
|
|
151
|
+
spans = list(self.spans.all().order_by("start_time"))
|
|
152
|
+
|
|
153
|
+
# Build span dictionary for parent lookups
|
|
154
|
+
span_dict = {span.span_id: span for span in spans}
|
|
155
|
+
|
|
156
|
+
# Calculate nesting levels
|
|
157
|
+
for span in spans:
|
|
158
|
+
if not span.parent_id:
|
|
159
|
+
span.level = 0
|
|
160
|
+
else:
|
|
161
|
+
# Find parent's level and add 1
|
|
162
|
+
parent = span_dict.get(span.parent_id)
|
|
163
|
+
parent_level = parent.level if parent else 0
|
|
164
|
+
span.level = parent_level + 1
|
|
165
|
+
|
|
166
|
+
query_counts = {}
|
|
167
|
+
|
|
168
|
+
# First pass: count queries
|
|
169
|
+
for span in spans:
|
|
170
|
+
if sql_query := span.sql_query:
|
|
171
|
+
query_counts[sql_query] = query_counts.get(sql_query, 0) + 1
|
|
172
|
+
|
|
173
|
+
# Second pass: add annotations
|
|
174
|
+
query_occurrences = {}
|
|
175
|
+
for span in spans:
|
|
176
|
+
span.annotations = []
|
|
177
|
+
|
|
178
|
+
# Check for duplicate queries
|
|
179
|
+
if sql_query := span.sql_query:
|
|
180
|
+
count = query_counts[sql_query]
|
|
181
|
+
if count > 1:
|
|
182
|
+
occurrence = query_occurrences.get(sql_query, 0) + 1
|
|
183
|
+
query_occurrences[sql_query] = occurrence
|
|
184
|
+
|
|
185
|
+
span.annotations.append(
|
|
186
|
+
{
|
|
187
|
+
"message": f"Duplicate query ({occurrence} of {count})",
|
|
188
|
+
"severity": "warning",
|
|
189
|
+
}
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
return spans
|
|
193
|
+
|
|
194
|
+
def as_dict(self):
|
|
195
|
+
spans = [span.span_data for span in self.spans.all().order_by("start_time")]
|
|
196
|
+
|
|
197
|
+
return {
|
|
198
|
+
"trace_id": self.trace_id,
|
|
199
|
+
"start_time": self.start_time.isoformat(),
|
|
200
|
+
"end_time": self.end_time.isoformat(),
|
|
201
|
+
"duration_ms": self.duration_ms(),
|
|
202
|
+
"request_id": self.request_id,
|
|
203
|
+
"user_id": self.user_id,
|
|
204
|
+
"session_id": self.session_id,
|
|
205
|
+
"spans": spans,
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
@models.register_model
|
|
210
|
+
class Span(models.Model):
|
|
211
|
+
trace = models.ForeignKey(Trace, on_delete=models.CASCADE, related_name="spans")
|
|
212
|
+
|
|
213
|
+
span_id = models.CharField(max_length=255)
|
|
214
|
+
|
|
215
|
+
name = models.CharField(max_length=255)
|
|
216
|
+
kind = models.CharField(max_length=50)
|
|
217
|
+
parent_id = models.CharField(max_length=255, default="", required=False)
|
|
218
|
+
start_time = models.DateTimeField()
|
|
219
|
+
end_time = models.DateTimeField()
|
|
220
|
+
status = models.CharField(max_length=50, default="", required=False)
|
|
221
|
+
span_data = models.JSONField(default=dict, required=False)
|
|
222
|
+
|
|
223
|
+
class Meta:
|
|
224
|
+
ordering = ["-start_time"]
|
|
225
|
+
constraints = [
|
|
226
|
+
models.UniqueConstraint(
|
|
227
|
+
fields=["trace", "span_id"],
|
|
228
|
+
name="observer_unique_span_id",
|
|
229
|
+
)
|
|
230
|
+
]
|
|
231
|
+
indexes = [
|
|
232
|
+
models.Index(fields=["trace", "span_id"]),
|
|
233
|
+
models.Index(fields=["trace"]),
|
|
234
|
+
models.Index(fields=["start_time"]),
|
|
235
|
+
]
|
|
236
|
+
|
|
237
|
+
@classmethod
|
|
238
|
+
def from_opentelemetry_span(cls, otel_span, trace):
|
|
239
|
+
"""Create a Span instance from an OpenTelemetry span."""
|
|
240
|
+
|
|
241
|
+
span_data = json.loads(otel_span.to_json())
|
|
242
|
+
|
|
243
|
+
# Extract status code as string, default to empty string if unset
|
|
244
|
+
status = ""
|
|
245
|
+
if span_data.get("status") and span_data["status"].get("status_code"):
|
|
246
|
+
status = span_data["status"]["status_code"]
|
|
247
|
+
|
|
248
|
+
return cls(
|
|
249
|
+
trace=trace,
|
|
250
|
+
span_id=span_data["context"]["span_id"],
|
|
251
|
+
name=span_data["name"],
|
|
252
|
+
kind=span_data["kind"][len("SpanKind.") :],
|
|
253
|
+
parent_id=span_data["parent_id"] or "",
|
|
254
|
+
start_time=span_data["start_time"],
|
|
255
|
+
end_time=span_data["end_time"],
|
|
256
|
+
status=status,
|
|
257
|
+
span_data=span_data,
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
def __str__(self):
|
|
261
|
+
return self.span_id
|
|
262
|
+
|
|
263
|
+
@property
|
|
264
|
+
def attributes(self):
|
|
265
|
+
"""Get attributes from span_data."""
|
|
266
|
+
return self.span_data.get("attributes", {})
|
|
267
|
+
|
|
268
|
+
@property
|
|
269
|
+
def events(self):
|
|
270
|
+
"""Get events from span_data."""
|
|
271
|
+
return self.span_data.get("events", [])
|
|
272
|
+
|
|
273
|
+
@property
|
|
274
|
+
def links(self):
|
|
275
|
+
"""Get links from span_data."""
|
|
276
|
+
return self.span_data.get("links", [])
|
|
277
|
+
|
|
278
|
+
@property
|
|
279
|
+
def resource(self):
|
|
280
|
+
"""Get resource from span_data."""
|
|
281
|
+
return self.span_data.get("resource", {})
|
|
282
|
+
|
|
283
|
+
@property
|
|
284
|
+
def context(self):
|
|
285
|
+
"""Get context from span_data."""
|
|
286
|
+
return self.span_data.get("context", {})
|
|
287
|
+
|
|
288
|
+
def duration_ms(self):
|
|
289
|
+
if self.start_time and self.end_time:
|
|
290
|
+
return (self.end_time - self.start_time).total_seconds() * 1000
|
|
291
|
+
return 0
|
|
292
|
+
|
|
293
|
+
@cached_property
|
|
294
|
+
def sql_query(self):
|
|
295
|
+
"""Get the SQL query if this span contains one."""
|
|
296
|
+
return self.attributes.get(db_attributes.DB_QUERY_TEXT)
|
|
297
|
+
|
|
298
|
+
@cached_property
|
|
299
|
+
def sql_query_params(self):
|
|
300
|
+
"""Get query parameters from attributes that start with 'db.query.parameter.'"""
|
|
301
|
+
if not self.attributes:
|
|
302
|
+
return {}
|
|
303
|
+
|
|
304
|
+
query_params = {}
|
|
305
|
+
for key, value in self.attributes.items():
|
|
306
|
+
if key.startswith(DB_QUERY_PARAMETER_TEMPLATE + "."):
|
|
307
|
+
param_name = key.replace(DB_QUERY_PARAMETER_TEMPLATE + ".", "")
|
|
308
|
+
query_params[param_name] = value
|
|
309
|
+
|
|
310
|
+
return query_params
|
|
311
|
+
|
|
312
|
+
def get_formatted_sql(self):
|
|
313
|
+
"""Get the pretty-formatted SQL query if this span contains one."""
|
|
314
|
+
sql = self.sql_query
|
|
315
|
+
if not sql:
|
|
316
|
+
return None
|
|
317
|
+
|
|
318
|
+
return sqlparse.format(
|
|
319
|
+
sql,
|
|
320
|
+
reindent=True,
|
|
321
|
+
keyword_case="upper",
|
|
322
|
+
identifier_case="lower",
|
|
323
|
+
strip_comments=False,
|
|
324
|
+
strip_whitespace=True,
|
|
325
|
+
indent_width=2,
|
|
326
|
+
wrap_after=80,
|
|
327
|
+
comma_first=False,
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
def format_event_timestamp(self, timestamp):
|
|
331
|
+
"""Convert event timestamp to a readable datetime."""
|
|
332
|
+
if isinstance(timestamp, int | float):
|
|
333
|
+
try:
|
|
334
|
+
# Try as seconds first
|
|
335
|
+
if timestamp > 1e10: # Likely nanoseconds
|
|
336
|
+
timestamp = timestamp / 1e9
|
|
337
|
+
elif timestamp > 1e7: # Likely milliseconds
|
|
338
|
+
timestamp = timestamp / 1e3
|
|
339
|
+
|
|
340
|
+
return datetime.fromtimestamp(timestamp, tz=UTC)
|
|
341
|
+
except (ValueError, OSError):
|
|
342
|
+
return str(timestamp)
|
|
343
|
+
return timestamp
|
|
344
|
+
|
|
345
|
+
def get_exception_stacktrace(self):
|
|
346
|
+
"""Get the exception stacktrace if this span has an exception event."""
|
|
347
|
+
if not self.events:
|
|
348
|
+
return None
|
|
349
|
+
|
|
350
|
+
for event in self.events:
|
|
351
|
+
if event.get("name") == "exception" and event.get("attributes"):
|
|
352
|
+
return event["attributes"].get(
|
|
353
|
+
exception_attributes.EXCEPTION_STACKTRACE
|
|
354
|
+
)
|
|
355
|
+
return None
|
plain/observer/otel.py
ADDED
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import re
|
|
3
|
+
import threading
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
|
|
6
|
+
import opentelemetry.context as context_api
|
|
7
|
+
from opentelemetry import baggage, trace
|
|
8
|
+
from opentelemetry.sdk.trace import SpanProcessor, sampling
|
|
9
|
+
from opentelemetry.semconv.attributes import url_attributes
|
|
10
|
+
from opentelemetry.trace import SpanKind, format_span_id, format_trace_id
|
|
11
|
+
|
|
12
|
+
from plain.http.cookie import unsign_cookie_value
|
|
13
|
+
from plain.models.otel import suppress_db_tracing
|
|
14
|
+
from plain.runtime import settings
|
|
15
|
+
|
|
16
|
+
from .core import Observer, ObserverMode
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def get_span_processor():
|
|
22
|
+
"""Get the span collector instance from the tracer provider."""
|
|
23
|
+
if not (current_provider := trace.get_tracer_provider()):
|
|
24
|
+
return None
|
|
25
|
+
|
|
26
|
+
# Look for ObserverSpanProcessor in the span processors
|
|
27
|
+
# Check if the provider has a _active_span_processor attribute
|
|
28
|
+
if hasattr(current_provider, "_active_span_processor"):
|
|
29
|
+
# It's a composite processor, check its _span_processors
|
|
30
|
+
if composite_processor := current_provider._active_span_processor:
|
|
31
|
+
if hasattr(composite_processor, "_span_processors"):
|
|
32
|
+
for processor in composite_processor._span_processors:
|
|
33
|
+
if isinstance(processor, ObserverSpanProcessor):
|
|
34
|
+
return processor
|
|
35
|
+
|
|
36
|
+
return None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def get_current_trace_summary() -> str | None:
|
|
40
|
+
"""Get performance summary for the currently active trace."""
|
|
41
|
+
if not (current_span := trace.get_current_span()):
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
if not (processor := get_span_processor()):
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
trace_id = f"0x{format_trace_id(current_span.get_span_context().trace_id)}"
|
|
48
|
+
return processor.get_trace_summary(trace_id)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class ObserverSampler(sampling.Sampler):
|
|
52
|
+
"""Samples traces based on request path and cookies."""
|
|
53
|
+
|
|
54
|
+
def __init__(self):
|
|
55
|
+
# Custom parent-based sampler
|
|
56
|
+
self._delegate = sampling.ParentBased(sampling.ALWAYS_OFF)
|
|
57
|
+
|
|
58
|
+
# TODO ignore url namespace instead? admin, observer, assets
|
|
59
|
+
self._ignore_url_paths = [
|
|
60
|
+
re.compile(p) for p in settings.OBSERVER_IGNORE_URL_PATTERNS
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
def should_sample(
|
|
64
|
+
self,
|
|
65
|
+
parent_context,
|
|
66
|
+
trace_id,
|
|
67
|
+
name,
|
|
68
|
+
kind: SpanKind | None = None,
|
|
69
|
+
attributes=None,
|
|
70
|
+
links=None,
|
|
71
|
+
trace_state=None,
|
|
72
|
+
):
|
|
73
|
+
# First, drop if the URL should be ignored.
|
|
74
|
+
if attributes:
|
|
75
|
+
if url_path := attributes.get(url_attributes.URL_PATH, ""):
|
|
76
|
+
for pattern in self._ignore_url_paths:
|
|
77
|
+
if pattern.match(url_path):
|
|
78
|
+
return sampling.SamplingResult(
|
|
79
|
+
sampling.Decision.DROP,
|
|
80
|
+
attributes=attributes,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# If no processor decision, check cookies directly for root spans
|
|
84
|
+
decision = None
|
|
85
|
+
if parent_context:
|
|
86
|
+
# Check cookies for sampling decision
|
|
87
|
+
if cookies := baggage.get_baggage("http.request.cookies", parent_context):
|
|
88
|
+
if observer_cookie := cookies.get(Observer.COOKIE_NAME):
|
|
89
|
+
unsigned_value = unsign_cookie_value(
|
|
90
|
+
Observer.COOKIE_NAME, observer_cookie, default=False
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
if unsigned_value in (
|
|
94
|
+
ObserverMode.PERSIST.value,
|
|
95
|
+
ObserverMode.SUMMARY.value,
|
|
96
|
+
):
|
|
97
|
+
# Always use RECORD_AND_SAMPLE so ParentBased works correctly
|
|
98
|
+
# The processor will check the mode to decide whether to export
|
|
99
|
+
decision = sampling.Decision.RECORD_AND_SAMPLE
|
|
100
|
+
else:
|
|
101
|
+
decision = sampling.Decision.DROP
|
|
102
|
+
|
|
103
|
+
# If there are links, assume it is to another trace/span that we are keeping
|
|
104
|
+
if links:
|
|
105
|
+
decision = sampling.Decision.RECORD_AND_SAMPLE
|
|
106
|
+
|
|
107
|
+
# If no decision from cookies, use default
|
|
108
|
+
if decision is None:
|
|
109
|
+
result = self._delegate.should_sample(
|
|
110
|
+
parent_context,
|
|
111
|
+
trace_id,
|
|
112
|
+
name,
|
|
113
|
+
kind=kind,
|
|
114
|
+
attributes=attributes,
|
|
115
|
+
links=links,
|
|
116
|
+
trace_state=trace_state,
|
|
117
|
+
)
|
|
118
|
+
decision = result.decision
|
|
119
|
+
|
|
120
|
+
return sampling.SamplingResult(
|
|
121
|
+
decision,
|
|
122
|
+
attributes=attributes,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
def get_description(self) -> str:
|
|
126
|
+
return "ObserverSampler"
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class ObserverSpanProcessor(SpanProcessor):
|
|
130
|
+
"""Collects spans in real-time for current trace performance monitoring.
|
|
131
|
+
|
|
132
|
+
This processor keeps spans in memory for traces that have the 'summary' or 'persist'
|
|
133
|
+
cookie set. These spans can be accessed via get_current_trace_summary() for
|
|
134
|
+
real-time debugging. Spans with 'persist' cookie will also be persisted to the
|
|
135
|
+
database.
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
def __init__(self):
|
|
139
|
+
# Span storage
|
|
140
|
+
self._traces = defaultdict(
|
|
141
|
+
lambda: {
|
|
142
|
+
"trace": None, # Trace model instance
|
|
143
|
+
"active_otel_spans": {}, # span_id -> opentelemetry span
|
|
144
|
+
"completed_otel_spans": [], # list of opentelemetry spans
|
|
145
|
+
"span_models": [], # list of Span model instances
|
|
146
|
+
"root_span_id": None,
|
|
147
|
+
"mode": None, # None, ObserverMode.SUMMARY.value, or ObserverMode.PERSIST.value
|
|
148
|
+
}
|
|
149
|
+
)
|
|
150
|
+
self._traces_lock = threading.Lock()
|
|
151
|
+
|
|
152
|
+
def on_start(self, span, parent_context=None):
|
|
153
|
+
"""Called when a span starts."""
|
|
154
|
+
trace_id = f"0x{format_trace_id(span.get_span_context().trace_id)}"
|
|
155
|
+
|
|
156
|
+
with self._traces_lock:
|
|
157
|
+
# Check if we already have this trace
|
|
158
|
+
if trace_id in self._traces:
|
|
159
|
+
trace_info = self._traces[trace_id]
|
|
160
|
+
else:
|
|
161
|
+
# First span in trace - determine if we should record it
|
|
162
|
+
mode = self._get_recording_mode(span, parent_context)
|
|
163
|
+
if not mode:
|
|
164
|
+
# Don't create trace entry for traces we won't record
|
|
165
|
+
return
|
|
166
|
+
|
|
167
|
+
# Create trace entry only for traces we'll record
|
|
168
|
+
trace_info = self._traces[trace_id]
|
|
169
|
+
trace_info["mode"] = mode
|
|
170
|
+
|
|
171
|
+
# Clean up old traces if too many
|
|
172
|
+
if len(self._traces) > 1000:
|
|
173
|
+
# Remove oldest 100 traces
|
|
174
|
+
oldest_ids = sorted(self._traces.keys())[:100]
|
|
175
|
+
for old_id in oldest_ids:
|
|
176
|
+
del self._traces[old_id]
|
|
177
|
+
|
|
178
|
+
span_id = f"0x{format_span_id(span.get_span_context().span_id)}"
|
|
179
|
+
|
|
180
|
+
# Store span (we know mode is truthy if we get here)
|
|
181
|
+
trace_info["active_otel_spans"][span_id] = span
|
|
182
|
+
|
|
183
|
+
# Track root span
|
|
184
|
+
if not span.parent:
|
|
185
|
+
trace_info["root_span_id"] = span_id
|
|
186
|
+
|
|
187
|
+
def on_end(self, span):
|
|
188
|
+
"""Called when a span ends."""
|
|
189
|
+
trace_id = f"0x{format_trace_id(span.get_span_context().trace_id)}"
|
|
190
|
+
span_id = f"0x{format_span_id(span.get_span_context().span_id)}"
|
|
191
|
+
|
|
192
|
+
with self._traces_lock:
|
|
193
|
+
# Skip if we don't have this trace (mode was None on start)
|
|
194
|
+
if trace_id not in self._traces:
|
|
195
|
+
return
|
|
196
|
+
|
|
197
|
+
trace_info = self._traces[trace_id]
|
|
198
|
+
|
|
199
|
+
# Move span from active to completed
|
|
200
|
+
if trace_info["active_otel_spans"].pop(span_id, None):
|
|
201
|
+
trace_info["completed_otel_spans"].append(span)
|
|
202
|
+
|
|
203
|
+
# Check if trace is complete (root span ended)
|
|
204
|
+
if span_id == trace_info["root_span_id"]:
|
|
205
|
+
all_spans = trace_info["completed_otel_spans"]
|
|
206
|
+
|
|
207
|
+
from .models import Span, Trace
|
|
208
|
+
|
|
209
|
+
trace_info["trace"] = Trace.from_opentelemetry_spans(all_spans)
|
|
210
|
+
trace_info["span_models"] = [
|
|
211
|
+
Span.from_opentelemetry_span(s, trace_info["trace"])
|
|
212
|
+
for s in all_spans
|
|
213
|
+
]
|
|
214
|
+
|
|
215
|
+
# Export if in persist mode
|
|
216
|
+
if trace_info["mode"] == ObserverMode.PERSIST.value:
|
|
217
|
+
logger.debug(
|
|
218
|
+
"Exporting %d spans for trace %s",
|
|
219
|
+
len(trace_info["span_models"]),
|
|
220
|
+
trace_id,
|
|
221
|
+
)
|
|
222
|
+
self._export_trace(trace_info["trace"], trace_info["span_models"])
|
|
223
|
+
|
|
224
|
+
# Clean up trace
|
|
225
|
+
del self._traces[trace_id]
|
|
226
|
+
|
|
227
|
+
def get_trace_summary(self, trace_id: str) -> str | None:
|
|
228
|
+
"""Get performance summary for a specific trace."""
|
|
229
|
+
from .models import Span, Trace
|
|
230
|
+
|
|
231
|
+
with self._traces_lock:
|
|
232
|
+
# Return None if trace doesn't exist (mode was None)
|
|
233
|
+
if trace_id not in self._traces:
|
|
234
|
+
return None
|
|
235
|
+
|
|
236
|
+
trace_info = self._traces[trace_id]
|
|
237
|
+
|
|
238
|
+
# Combine active and completed spans
|
|
239
|
+
all_otel_spans = (
|
|
240
|
+
list(trace_info["active_otel_spans"].values())
|
|
241
|
+
+ trace_info["completed_otel_spans"]
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
if not all_otel_spans:
|
|
245
|
+
return None
|
|
246
|
+
|
|
247
|
+
# Create or update trace model instance
|
|
248
|
+
if not trace_info["trace"]:
|
|
249
|
+
trace_info["trace"] = Trace.from_opentelemetry_spans(all_otel_spans)
|
|
250
|
+
|
|
251
|
+
if not trace_info["trace"]:
|
|
252
|
+
return None
|
|
253
|
+
|
|
254
|
+
# Create span model instances if needed
|
|
255
|
+
span_models = trace_info.get("span_models", [])
|
|
256
|
+
if not span_models:
|
|
257
|
+
span_models = [
|
|
258
|
+
Span.from_opentelemetry_span(s, trace_info["trace"])
|
|
259
|
+
for s in all_otel_spans
|
|
260
|
+
]
|
|
261
|
+
|
|
262
|
+
return trace_info["trace"].get_trace_summary(span_models)
|
|
263
|
+
|
|
264
|
+
def _export_trace(self, trace, span_models):
|
|
265
|
+
"""Export trace and spans to the database."""
|
|
266
|
+
from .models import Span, Trace
|
|
267
|
+
|
|
268
|
+
with suppress_db_tracing():
|
|
269
|
+
try:
|
|
270
|
+
trace.save()
|
|
271
|
+
|
|
272
|
+
for span_model in span_models:
|
|
273
|
+
span_model.trace = trace
|
|
274
|
+
|
|
275
|
+
# Bulk create spans
|
|
276
|
+
Span.objects.bulk_create(span_models)
|
|
277
|
+
except Exception as e:
|
|
278
|
+
logger.warning(
|
|
279
|
+
"Failed to export trace to database: %s",
|
|
280
|
+
e,
|
|
281
|
+
exc_info=True,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
# Delete oldest traces if we exceed the limit
|
|
285
|
+
if settings.OBSERVER_TRACE_LIMIT > 0:
|
|
286
|
+
try:
|
|
287
|
+
if Trace.objects.count() > settings.OBSERVER_TRACE_LIMIT:
|
|
288
|
+
delete_ids = Trace.objects.order_by("start_time")[
|
|
289
|
+
: settings.OBSERVER_TRACE_LIMIT
|
|
290
|
+
].values_list("id", flat=True)
|
|
291
|
+
Trace.objects.filter(id__in=delete_ids).delete()
|
|
292
|
+
except Exception as e:
|
|
293
|
+
logger.warning(
|
|
294
|
+
"Failed to clean up old observer traces: %s", e, exc_info=True
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
def _get_recording_mode(self, span, parent_context) -> str | None:
|
|
298
|
+
# If the span has links, then we are going to export if the linked span is also exported
|
|
299
|
+
for link in span.links:
|
|
300
|
+
if link.context.is_valid and link.context.span_id:
|
|
301
|
+
from .models import Span
|
|
302
|
+
|
|
303
|
+
if Span.objects.filter(
|
|
304
|
+
span_id=f"0x{format_span_id(link.context.span_id)}"
|
|
305
|
+
).exists():
|
|
306
|
+
return ObserverMode.PERSIST.value
|
|
307
|
+
|
|
308
|
+
if not (context := parent_context or context_api.get_current()):
|
|
309
|
+
return None
|
|
310
|
+
|
|
311
|
+
if not (cookies := baggage.get_baggage("http.request.cookies", context)):
|
|
312
|
+
return None
|
|
313
|
+
|
|
314
|
+
if not (observer_cookie := cookies.get(Observer.COOKIE_NAME)):
|
|
315
|
+
return None
|
|
316
|
+
|
|
317
|
+
try:
|
|
318
|
+
mode = unsign_cookie_value(
|
|
319
|
+
Observer.COOKIE_NAME, observer_cookie, default=None
|
|
320
|
+
)
|
|
321
|
+
if mode in (ObserverMode.SUMMARY.value, ObserverMode.PERSIST.value):
|
|
322
|
+
return mode
|
|
323
|
+
except Exception as e:
|
|
324
|
+
logger.warning("Failed to unsign observer cookie: %s", e)
|
|
325
|
+
|
|
326
|
+
return None
|
|
327
|
+
|
|
328
|
+
def shutdown(self):
|
|
329
|
+
"""Cleanup when shutting down."""
|
|
330
|
+
with self._traces_lock:
|
|
331
|
+
self._traces.clear()
|
|
332
|
+
|
|
333
|
+
def force_flush(self, timeout_millis=None):
|
|
334
|
+
"""Required by SpanProcessor interface."""
|
|
335
|
+
return True
|