genkit-plugin-google-cloud 0.3.2__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genkit/plugins/google_cloud/__init__.py +127 -2
- genkit/plugins/google_cloud/telemetry/__init__.py +74 -0
- genkit/plugins/google_cloud/telemetry/action.py +124 -0
- genkit/plugins/google_cloud/telemetry/engagement.py +170 -0
- genkit/plugins/google_cloud/telemetry/feature.py +186 -0
- genkit/plugins/google_cloud/telemetry/generate.py +605 -0
- genkit/plugins/google_cloud/telemetry/metrics.py +246 -0
- genkit/plugins/google_cloud/telemetry/path.py +157 -0
- genkit/plugins/google_cloud/telemetry/tracing.py +880 -29
- genkit/plugins/google_cloud/telemetry/utils.py +217 -0
- {genkit_plugin_google_cloud-0.3.2.dist-info → genkit_plugin_google_cloud-0.5.0.dist-info}/METADATA +10 -2
- genkit_plugin_google_cloud-0.5.0.dist-info/RECORD +15 -0
- {genkit_plugin_google_cloud-0.3.2.dist-info → genkit_plugin_google_cloud-0.5.0.dist-info}/WHEEL +1 -1
- genkit_plugin_google_cloud-0.3.2.dist-info/RECORD +0 -9
- /genkit/{py.typed → plugins/google_cloud/py.typed} +0 -0
- {genkit_plugin_google_cloud-0.3.2.dist-info → genkit_plugin_google_cloud-0.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -15,31 +15,480 @@
|
|
|
15
15
|
# SPDX-License-Identifier: Apache-2.0
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
"""Telemetry and tracing functionality for the Genkit
|
|
18
|
+
"""Telemetry and tracing functionality for the Genkit Google Cloud plugin.
|
|
19
19
|
|
|
20
20
|
This module provides functionality for collecting and exporting telemetry data
|
|
21
|
-
from Genkit operations. It uses OpenTelemetry for tracing and
|
|
22
|
-
data to
|
|
21
|
+
from Genkit operations to Google Cloud. It uses OpenTelemetry for tracing and
|
|
22
|
+
exports span data to Google Cloud Trace for monitoring and debugging purposes.
|
|
23
23
|
|
|
24
|
-
|
|
25
|
-
|
|
24
|
+
Architecture Overview:
|
|
25
|
+
The telemetry system follows a pipeline architecture that processes spans
|
|
26
|
+
(traces) and metrics before exporting them to Google Cloud:
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
30
|
+
│ TELEMETRY DATA FLOW │
|
|
31
|
+
│ │
|
|
32
|
+
│ Genkit Actions (flows, models, tools) │
|
|
33
|
+
│ │ │
|
|
34
|
+
│ ▼ │
|
|
35
|
+
│ ┌─────────────────┐ │
|
|
36
|
+
│ │ OpenTelemetry │ Creates spans with genkit:* attributes │
|
|
37
|
+
│ │ Tracer │ (type, name, input, output, state, path, etc.) │
|
|
38
|
+
│ └────────┬────────┘ │
|
|
39
|
+
│ │ │
|
|
40
|
+
│ ▼ │
|
|
41
|
+
│ ┌─────────────────────────────────────────────────────────────┐ │
|
|
42
|
+
│ │ GcpAdjustingTraceExporter │ │
|
|
43
|
+
│ │ ┌─────────────────────────────────────────────────────┐ │ │
|
|
44
|
+
│ │ │ 1. _tick_telemetry() │ │ │
|
|
45
|
+
│ │ │ - pathsTelemetry.tick() → Error metrics/logs │ │ │
|
|
46
|
+
│ │ │ - featuresTelemetry.tick() → Feature metrics │ │ │
|
|
47
|
+
│ │ │ - generateTelemetry.tick() → Model metrics │ │ │
|
|
48
|
+
│ │ │ - actionTelemetry.tick() → Action I/O logs │ │ │
|
|
49
|
+
│ │ │ - engagementTelemetry.tick() → Feedback metrics │ │ │
|
|
50
|
+
│ │ │ - Sets genkit:rootState for root spans │ │ │
|
|
51
|
+
│ │ └─────────────────────────────────────────────────────┘ │ │
|
|
52
|
+
│ │ ┌─────────────────────────────────────────────────────┐ │ │
|
|
53
|
+
│ │ │ 2. AdjustingTraceExporter._adjust() │ │ │
|
|
54
|
+
│ │ │ - Redact genkit:input/output → "<redacted>" │ │ │
|
|
55
|
+
│ │ │ - Mark error spans with /http/status_code: 599 │ │ │
|
|
56
|
+
│ │ │ - Mark failed spans with genkit:failedSpan │ │ │
|
|
57
|
+
│ │ │ - Mark root spans with genkit:feature │ │ │
|
|
58
|
+
│ │ │ - Mark model spans with genkit:model │ │ │
|
|
59
|
+
│ │ │ - Normalize labels (: → /) for GCP compatibility │ │ │
|
|
60
|
+
│ │ └─────────────────────────────────────────────────────┘ │ │
|
|
61
|
+
│ └────────────────────────┬────────────────────────────────────┘ │
|
|
62
|
+
│ │ │
|
|
63
|
+
│ ┌───────────────┴───────────────┐ │
|
|
64
|
+
│ ▼ ▼ │
|
|
65
|
+
│ ┌─────────────────┐ ┌─────────────────┐ │
|
|
66
|
+
│ │ GenkitGCPExporter│ │ Cloud Logging │ │
|
|
67
|
+
│ │ (Cloud Trace) │ │ (via structlog) │ │
|
|
68
|
+
│ └────────┬────────┘ └─────────────────┘ │
|
|
69
|
+
│ │ │
|
|
70
|
+
│ ▼ │
|
|
71
|
+
│ ┌─────────────────┐ │
|
|
72
|
+
│ │ Google Cloud │ │
|
|
73
|
+
│ │ Trace API │ │
|
|
74
|
+
│ └─────────────────┘ │
|
|
75
|
+
│ │
|
|
76
|
+
│ ─────────────────────── METRICS PIPELINE ──────────────────────── │
|
|
77
|
+
│ │
|
|
78
|
+
│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │
|
|
79
|
+
│ │ OpenTelemetry │───▶│ GenkitMetric │───▶│ Cloud Monitoring│ │
|
|
80
|
+
│ │ Meter │ │ Exporter │ │ API │ │
|
|
81
|
+
│ │ (counters, │ │ (adjusts start │ │ │ │
|
|
82
|
+
│ │ histograms) │ │ times for │ │ │ │
|
|
83
|
+
│ └─────────────────┘ │ DELTA→CUMUL.) │ └─────────────────┘ │
|
|
84
|
+
│ └─────────────────┘ │
|
|
85
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Key Components:
|
|
89
|
+
1. **GcpAdjustingTraceExporter**: Extends AdjustingTraceExporter to add
|
|
90
|
+
GCP-specific telemetry recording before spans are adjusted and exported.
|
|
91
|
+
|
|
92
|
+
2. **AdjustingTraceExporter** (from genkit.core.trace): Base class that
|
|
93
|
+
handles PII redaction, error marking, and label normalization.
|
|
94
|
+
|
|
95
|
+
3. **GenkitGCPExporter**: Extends CloudTraceSpanExporter with retry logic
|
|
96
|
+
for reliable delivery to Google Cloud Trace.
|
|
97
|
+
|
|
98
|
+
4. **GenkitMetricExporter**: Wraps CloudMonitoringMetricsExporter and
|
|
99
|
+
adjusts start times to prevent overlap when GCP converts DELTA to
|
|
100
|
+
CUMULATIVE aggregation.
|
|
101
|
+
|
|
102
|
+
5. **Telemetry Handlers** (in separate modules):
|
|
103
|
+
- feature.py: Tracks root span requests/latency
|
|
104
|
+
- path.py: Tracks error paths and failure metrics
|
|
105
|
+
- generate.py: Tracks model usage (tokens, latency, media)
|
|
106
|
+
- action.py: Logs tool and action I/O
|
|
107
|
+
- engagement.py: Tracks user feedback and acceptance
|
|
108
|
+
|
|
109
|
+
Telemetry Types and When They Fire:
|
|
110
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
111
|
+
│ Telemetry Type │ Condition │ What It Records │
|
|
112
|
+
├────────────────┼──────────────────────────────┼─────────────────────────┤
|
|
113
|
+
│ paths │ Always (for all spans) │ Error paths, failures │
|
|
114
|
+
│ features │ genkit:isRoot = true │ Request count, latency │
|
|
115
|
+
│ generate │ type=action, subtype=model │ Tokens, latency, media │
|
|
116
|
+
│ action │ type in (action,flow,...) │ Input/output logs │
|
|
117
|
+
│ engagement │ type=userEngagement │ Feedback, acceptance │
|
|
118
|
+
└────────────────┴──────────────────────────────┴─────────────────────────┘
|
|
119
|
+
|
|
120
|
+
Span Attributes Used:
|
|
121
|
+
The system reads these genkit:* attributes from spans:
|
|
122
|
+
- genkit:type - Span type (action, flow, flowStep, util, userEngagement)
|
|
123
|
+
- genkit:metadata:subtype - Subtype (model, tool, etc.)
|
|
124
|
+
- genkit:isRoot - Whether this is the root/entry span
|
|
125
|
+
- genkit:name - Action/flow name
|
|
126
|
+
- genkit:path - Hierarchical path like /{flow,t:flow}/{step,t:flowStep}
|
|
127
|
+
- genkit:input - JSON-encoded input data
|
|
128
|
+
- genkit:output - JSON-encoded output data
|
|
129
|
+
- genkit:state - Span state (success, error)
|
|
130
|
+
- genkit:isFailureSource - Whether this span is the source of a failure
|
|
131
|
+
|
|
132
|
+
Configuration Options (matching JS/Go parity):
|
|
133
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
134
|
+
│ Option │ Type │ Default │ Description │
|
|
135
|
+
├─────────────────────────────┼──────────┼────────────┼───────────────────┤
|
|
136
|
+
│ project_id │ str │ Auto │ GCP project ID │
|
|
137
|
+
│ credentials │ dict │ ADC │ Service account │
|
|
138
|
+
│ log_input_and_output │ bool │ False │ Disable redaction │
|
|
139
|
+
│ force_dev_export │ bool │ True │ Export in dev │
|
|
140
|
+
│ disable_metrics │ bool │ False │ Skip metrics │
|
|
141
|
+
│ disable_traces │ bool │ False │ Skip traces │
|
|
142
|
+
│ metric_export_interval_ms │ int │ 60000 │ Export interval │
|
|
143
|
+
│ metric_export_timeout_ms │ int │ None │ Export timeout │
|
|
144
|
+
│ sampler │ Sampler │ AlwaysOn │ Trace sampler │
|
|
145
|
+
└─────────────────────────────┴──────────┴────────────┴───────────────────┘
|
|
146
|
+
|
|
147
|
+
Project ID Resolution Order:
|
|
148
|
+
1. Explicit project_id parameter
|
|
149
|
+
2. FIREBASE_PROJECT_ID environment variable
|
|
150
|
+
3. GOOGLE_CLOUD_PROJECT environment variable
|
|
151
|
+
4. GCLOUD_PROJECT environment variable
|
|
152
|
+
5. project_id from credentials dict
|
|
153
|
+
|
|
154
|
+
Usage:
|
|
155
|
+
```python
|
|
156
|
+
from genkit.plugins.google_cloud import add_gcp_telemetry
|
|
157
|
+
|
|
158
|
+
# Enable telemetry with default settings (PII redaction enabled)
|
|
159
|
+
add_gcp_telemetry()
|
|
160
|
+
|
|
161
|
+
# Enable telemetry with input/output logging (disable PII redaction)
|
|
162
|
+
add_gcp_telemetry(log_input_and_output=True)
|
|
163
|
+
|
|
164
|
+
# Force export even in dev environment
|
|
165
|
+
add_gcp_telemetry(force_dev_export=True)
|
|
166
|
+
|
|
167
|
+
# Disable metrics but keep traces
|
|
168
|
+
add_gcp_telemetry(disable_metrics=True)
|
|
169
|
+
|
|
170
|
+
# Custom metric export interval (minimum 5000ms for GCP)
|
|
171
|
+
add_gcp_telemetry(metric_export_interval_ms=30000)
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
Caveats:
|
|
175
|
+
- By default, model inputs and outputs are redacted for privacy
|
|
176
|
+
- Set log_input_and_output=True only in trusted environments
|
|
177
|
+
- In dev environment, telemetry is skipped unless force_dev_export=True
|
|
178
|
+
- GCP requires minimum 5000ms metric export interval (see quotas link below)
|
|
179
|
+
|
|
180
|
+
GCP Documentation References:
|
|
181
|
+
Cloud Trace:
|
|
182
|
+
- Overview: https://cloud.google.com/trace/docs
|
|
183
|
+
- IAM Roles: https://cloud.google.com/trace/docs/iam
|
|
184
|
+
- Required role: roles/cloudtrace.agent (Cloud Trace Agent)
|
|
185
|
+
|
|
186
|
+
Cloud Monitoring:
|
|
187
|
+
- Overview: https://cloud.google.com/monitoring/docs
|
|
188
|
+
- Quotas & Limits: https://cloud.google.com/monitoring/quotas
|
|
189
|
+
- Required role: roles/monitoring.metricWriter (Monitoring Metric Writer)
|
|
190
|
+
or roles/telemetry.metricsWriter (Cloud Telemetry Metrics Writer)
|
|
191
|
+
|
|
192
|
+
OpenTelemetry GCP Exporters:
|
|
193
|
+
- Documentation: https://google-cloud-opentelemetry.readthedocs.io/
|
|
194
|
+
- Cloud Trace Exporter: https://google-cloud-opentelemetry.readthedocs.io/en/stable/cloud_trace/cloud_trace.html
|
|
195
|
+
- Cloud Monitoring Exporter: https://google-cloud-opentelemetry.readthedocs.io/en/stable/cloud_monitoring/cloud_monitoring.html
|
|
196
|
+
|
|
197
|
+
Cross-Language Parity:
|
|
198
|
+
This implementation maintains parity with:
|
|
199
|
+
- JavaScript: js/plugins/google-cloud/src/gcpOpenTelemetry.ts
|
|
200
|
+
- Go: go/plugins/googlecloud/googlecloud.go
|
|
201
|
+
- Go: go/plugins/firebase/telemetry.go (FirebaseTelemetryOptions)
|
|
202
|
+
|
|
203
|
+
Key parity points:
|
|
204
|
+
- Same configuration options with equivalent semantics
|
|
205
|
+
- Same telemetry dispatch logic (when each handler fires)
|
|
206
|
+
- Same metrics names and dimensions
|
|
207
|
+
- Same span adjustment pipeline (redaction, marking, normalization)
|
|
208
|
+
- Same project ID resolution order
|
|
26
209
|
"""
|
|
27
210
|
|
|
28
|
-
|
|
211
|
+
import logging
|
|
212
|
+
import os
|
|
213
|
+
import uuid
|
|
214
|
+
from collections.abc import Callable, Mapping, MutableMapping, Sequence
|
|
215
|
+
from typing import Any, cast
|
|
29
216
|
|
|
30
217
|
import structlog
|
|
31
218
|
from google.api_core import exceptions as core_exceptions, retry as retries
|
|
32
219
|
from google.cloud.trace_v2 import BatchWriteSpansRequest
|
|
220
|
+
from opentelemetry import metrics, trace
|
|
221
|
+
from opentelemetry.exporter.cloud_monitoring import CloudMonitoringMetricsExporter
|
|
33
222
|
from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter
|
|
34
|
-
from opentelemetry.
|
|
35
|
-
|
|
36
|
-
SpanExportResult,
|
|
223
|
+
from opentelemetry.resourcedetector.gcp_resource_detector import (
|
|
224
|
+
GoogleCloudResourceDetector,
|
|
37
225
|
)
|
|
226
|
+
from opentelemetry.sdk.metrics import (
|
|
227
|
+
Counter,
|
|
228
|
+
Histogram,
|
|
229
|
+
MeterProvider,
|
|
230
|
+
ObservableCounter,
|
|
231
|
+
ObservableGauge,
|
|
232
|
+
ObservableUpDownCounter,
|
|
233
|
+
UpDownCounter,
|
|
234
|
+
)
|
|
235
|
+
from opentelemetry.sdk.metrics.export import (
|
|
236
|
+
AggregationTemporality,
|
|
237
|
+
MetricExporter,
|
|
238
|
+
MetricExportResult,
|
|
239
|
+
MetricsData,
|
|
240
|
+
PeriodicExportingMetricReader,
|
|
241
|
+
)
|
|
242
|
+
from opentelemetry.sdk.resources import SERVICE_INSTANCE_ID, SERVICE_NAME, Resource
|
|
243
|
+
from opentelemetry.sdk.trace import ReadableSpan
|
|
244
|
+
from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
|
|
245
|
+
from opentelemetry.sdk.trace.sampling import Sampler
|
|
38
246
|
|
|
247
|
+
from genkit.core.environment import is_dev_environment
|
|
248
|
+
from genkit.core.trace.adjusting_exporter import AdjustingTraceExporter, RedactedSpan
|
|
39
249
|
from genkit.core.tracing import add_custom_exporter
|
|
40
250
|
|
|
251
|
+
from .action import action_telemetry
|
|
252
|
+
from .engagement import engagement_telemetry
|
|
253
|
+
from .feature import features_telemetry
|
|
254
|
+
from .generate import generate_telemetry
|
|
255
|
+
from .path import paths_telemetry
|
|
256
|
+
|
|
41
257
|
logger = structlog.get_logger(__name__)
|
|
42
258
|
|
|
259
|
+
# Constants matching JS/Go implementations
|
|
260
|
+
MIN_METRIC_EXPORT_INTERVAL_MS = 5000
|
|
261
|
+
DEFAULT_METRIC_EXPORT_INTERVAL_MS = 300000
|
|
262
|
+
DEV_METRIC_EXPORT_INTERVAL_MS = 5000
|
|
263
|
+
PROD_METRIC_EXPORT_INTERVAL_MS = 300000
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def _resolve_project_id(
|
|
267
|
+
project_id: str | None = None,
|
|
268
|
+
credentials: dict[str, Any] | None = None,
|
|
269
|
+
) -> str | None:
|
|
270
|
+
"""Resolve the GCP project ID from various sources.
|
|
271
|
+
|
|
272
|
+
Resolution order (matching JS/Go):
|
|
273
|
+
1. Explicit project_id parameter
|
|
274
|
+
2. FIREBASE_PROJECT_ID environment variable
|
|
275
|
+
3. GOOGLE_CLOUD_PROJECT environment variable
|
|
276
|
+
4. GCLOUD_PROJECT environment variable
|
|
277
|
+
5. Project ID from credentials
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
project_id: Explicitly provided project ID.
|
|
281
|
+
credentials: Optional credentials dict with project_id.
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
The resolved project ID or None.
|
|
285
|
+
"""
|
|
286
|
+
if project_id:
|
|
287
|
+
return project_id
|
|
288
|
+
|
|
289
|
+
# Check environment variables in order of priority
|
|
290
|
+
for env_var in ('FIREBASE_PROJECT_ID', 'GOOGLE_CLOUD_PROJECT', 'GCLOUD_PROJECT'):
|
|
291
|
+
env_value = os.environ.get(env_var)
|
|
292
|
+
if env_value:
|
|
293
|
+
return env_value
|
|
294
|
+
|
|
295
|
+
# Check credentials for project_id
|
|
296
|
+
if credentials and 'project_id' in credentials:
|
|
297
|
+
return credentials['project_id']
|
|
298
|
+
|
|
299
|
+
return None
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
class GcpTelemetry:
|
|
303
|
+
"""Central manager for GCP Telemetry configuration.
|
|
304
|
+
|
|
305
|
+
Encapsulates configuration and manages the lifecycle of Tracing, Metrics,
|
|
306
|
+
and Logging setup, ensuring consistent state (like project_id) across all
|
|
307
|
+
telemetry components.
|
|
308
|
+
"""
|
|
309
|
+
|
|
310
|
+
def __init__(
|
|
311
|
+
self,
|
|
312
|
+
project_id: str | None = None,
|
|
313
|
+
credentials: dict[str, Any] | None = None,
|
|
314
|
+
sampler: Sampler | None = None,
|
|
315
|
+
log_input_and_output: bool = False,
|
|
316
|
+
force_dev_export: bool = True,
|
|
317
|
+
disable_metrics: bool = False,
|
|
318
|
+
disable_traces: bool = False,
|
|
319
|
+
metric_export_interval_ms: int | None = None,
|
|
320
|
+
metric_export_timeout_ms: int | None = None,
|
|
321
|
+
) -> None:
|
|
322
|
+
"""Initialize the GCP Telemetry manager.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
project_id: GCP project ID.
|
|
326
|
+
credentials: Optional credentials dict.
|
|
327
|
+
sampler: Trace sampler.
|
|
328
|
+
log_input_and_output: If False, hides sensitive data.
|
|
329
|
+
force_dev_export: Check to force export in dev environment.
|
|
330
|
+
disable_metrics: If True, metrics are not exported.
|
|
331
|
+
disable_traces: If True, traces are not exported.
|
|
332
|
+
metric_export_interval_ms: Export interval in ms.
|
|
333
|
+
metric_export_timeout_ms: Export timeout in ms.
|
|
334
|
+
"""
|
|
335
|
+
self.credentials = credentials
|
|
336
|
+
self.sampler = sampler
|
|
337
|
+
self.log_input_and_output = log_input_and_output
|
|
338
|
+
self.force_dev_export = force_dev_export
|
|
339
|
+
self.disable_metrics = disable_metrics
|
|
340
|
+
self.disable_traces = disable_traces
|
|
341
|
+
|
|
342
|
+
# Resolve project ID immediately
|
|
343
|
+
self.project_id = _resolve_project_id(project_id, credentials)
|
|
344
|
+
|
|
345
|
+
# Determine metric export settings
|
|
346
|
+
is_dev = is_dev_environment()
|
|
347
|
+
|
|
348
|
+
default_interval = DEV_METRIC_EXPORT_INTERVAL_MS if is_dev else DEFAULT_METRIC_EXPORT_INTERVAL_MS
|
|
349
|
+
self.metric_export_interval_ms = metric_export_interval_ms or default_interval
|
|
350
|
+
|
|
351
|
+
if self.metric_export_interval_ms < MIN_METRIC_EXPORT_INTERVAL_MS:
|
|
352
|
+
logger.warning(
|
|
353
|
+
f'metric_export_interval_ms ({self.metric_export_interval_ms}) is below minimum '
|
|
354
|
+
f'({MIN_METRIC_EXPORT_INTERVAL_MS}), using minimum'
|
|
355
|
+
)
|
|
356
|
+
self.metric_export_interval_ms = MIN_METRIC_EXPORT_INTERVAL_MS
|
|
357
|
+
|
|
358
|
+
self.metric_export_timeout_ms = metric_export_timeout_ms or self.metric_export_interval_ms
|
|
359
|
+
|
|
360
|
+
def initialize(self) -> None:
|
|
361
|
+
"""Actuates the telemetry configuration."""
|
|
362
|
+
is_dev = is_dev_environment()
|
|
363
|
+
should_export = self.force_dev_export or not is_dev
|
|
364
|
+
|
|
365
|
+
if not should_export:
|
|
366
|
+
logger.debug('Telemetry export disabled in dev environment')
|
|
367
|
+
return
|
|
368
|
+
|
|
369
|
+
self._configure_logging()
|
|
370
|
+
self._configure_tracing()
|
|
371
|
+
self._configure_metrics()
|
|
372
|
+
|
|
373
|
+
def _configure_logging(self) -> None:
|
|
374
|
+
"""Configures structlog with trace correlation."""
|
|
375
|
+
try:
|
|
376
|
+
current_config = structlog.get_config()
|
|
377
|
+
processors = current_config.get('processors', [])
|
|
378
|
+
|
|
379
|
+
# Check if our bound method is already registered (by name or other heuristic if needed)
|
|
380
|
+
# Since methods are bound, simple equality check might fail if new instance.
|
|
381
|
+
# However, for simplicity and common usage, we'll append.
|
|
382
|
+
# A better check would be to see if any processor matches our signature/name.
|
|
383
|
+
|
|
384
|
+
# Simple deduplication: Check for function name in processors
|
|
385
|
+
if not any(getattr(p, '__name__', '') == 'inject_trace_context' for p in processors):
|
|
386
|
+
|
|
387
|
+
def inject_trace_context(
|
|
388
|
+
logger: Any, # noqa: ANN401
|
|
389
|
+
method_name: str,
|
|
390
|
+
event_dict: MutableMapping[str, Any],
|
|
391
|
+
) -> Mapping[str, Any]:
|
|
392
|
+
return self._inject_trace_context(
|
|
393
|
+
cast(logging.Logger, logger), method_name, cast(dict[str, Any], event_dict)
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
new_processors = list(processors)
|
|
397
|
+
new_processors.insert(max(0, len(new_processors) - 1), inject_trace_context)
|
|
398
|
+
structlog.configure(processors=new_processors)
|
|
399
|
+
logger.debug('Configured structlog for GCP trace correlation')
|
|
400
|
+
|
|
401
|
+
except Exception as e:
|
|
402
|
+
logger.warning('Failed to configure structlog for trace correlation', error=str(e))
|
|
403
|
+
|
|
404
|
+
def _configure_tracing(self) -> None:
|
|
405
|
+
if self.disable_traces:
|
|
406
|
+
return
|
|
407
|
+
|
|
408
|
+
exporter_kwargs: dict[str, Any] = {}
|
|
409
|
+
if self.project_id:
|
|
410
|
+
exporter_kwargs['project_id'] = self.project_id
|
|
411
|
+
if self.credentials:
|
|
412
|
+
exporter_kwargs['credentials'] = self.credentials
|
|
413
|
+
|
|
414
|
+
base_exporter = GenkitGCPExporter(**exporter_kwargs) if exporter_kwargs else GenkitGCPExporter()
|
|
415
|
+
|
|
416
|
+
trace_exporter = GcpAdjustingTraceExporter(
|
|
417
|
+
exporter=base_exporter,
|
|
418
|
+
log_input_and_output=self.log_input_and_output,
|
|
419
|
+
project_id=self.project_id,
|
|
420
|
+
error_handler=lambda e: _handle_tracing_error(e),
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
add_custom_exporter(trace_exporter, 'gcp_telemetry_server')
|
|
424
|
+
|
|
425
|
+
def _configure_metrics(self) -> None:
|
|
426
|
+
if self.disable_metrics:
|
|
427
|
+
return
|
|
428
|
+
|
|
429
|
+
try:
|
|
430
|
+
resource = Resource.create({
|
|
431
|
+
SERVICE_NAME: 'genkit',
|
|
432
|
+
SERVICE_INSTANCE_ID: str(uuid.uuid4()),
|
|
433
|
+
})
|
|
434
|
+
|
|
435
|
+
# Suppress detector warnings during GCP resource detection
|
|
436
|
+
detector_logger = logging.getLogger('opentelemetry.resourcedetector.gcp_resource_detector')
|
|
437
|
+
original_level = detector_logger.level
|
|
438
|
+
detector_logger.setLevel(logging.ERROR)
|
|
439
|
+
|
|
440
|
+
try:
|
|
441
|
+
gcp_resource = GoogleCloudResourceDetector(raise_on_error=True).detect()
|
|
442
|
+
resource = resource.merge(gcp_resource)
|
|
443
|
+
except Exception as e:
|
|
444
|
+
# For detection failure log the exception and use the default resource
|
|
445
|
+
detector_logger.warning(f'Google Cloud resource detection failed: {e}')
|
|
446
|
+
finally:
|
|
447
|
+
detector_logger.setLevel(original_level)
|
|
448
|
+
|
|
449
|
+
exporter_kwargs: dict[str, Any] = {}
|
|
450
|
+
if self.project_id:
|
|
451
|
+
exporter_kwargs['project_id'] = self.project_id
|
|
452
|
+
if self.credentials:
|
|
453
|
+
exporter_kwargs['credentials'] = self.credentials
|
|
454
|
+
|
|
455
|
+
metrics_exporter = GenkitMetricExporter(
|
|
456
|
+
exporter=CloudMonitoringMetricsExporter(**exporter_kwargs),
|
|
457
|
+
error_handler=lambda e: _handle_metric_error(e),
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
reader = PeriodicExportingMetricReader(
|
|
461
|
+
metrics_exporter,
|
|
462
|
+
export_interval_millis=self.metric_export_interval_ms,
|
|
463
|
+
export_timeout_millis=self.metric_export_timeout_ms,
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
provider = MeterProvider(metric_readers=[reader], resource=resource)
|
|
467
|
+
metrics.set_meter_provider(provider)
|
|
468
|
+
|
|
469
|
+
except Exception as e:
|
|
470
|
+
_handle_metric_error(e)
|
|
471
|
+
|
|
472
|
+
def _inject_trace_context(
|
|
473
|
+
self, logger: logging.Logger, method_name: str, event_dict: dict[str, Any]
|
|
474
|
+
) -> dict[str, Any]:
|
|
475
|
+
"""Structlog processor to inject GCP-compatible trace context."""
|
|
476
|
+
span = trace.get_current_span()
|
|
477
|
+
if span == trace.INVALID_SPAN:
|
|
478
|
+
return event_dict
|
|
479
|
+
|
|
480
|
+
ctx = span.get_span_context()
|
|
481
|
+
if not ctx.is_valid:
|
|
482
|
+
return event_dict
|
|
483
|
+
|
|
484
|
+
if self.project_id:
|
|
485
|
+
event_dict['logging.googleapis.com/trace'] = f'projects/{self.project_id}/traces/{ctx.trace_id:032x}'
|
|
486
|
+
|
|
487
|
+
event_dict['logging.googleapis.com/spanId'] = f'{ctx.span_id:016x}'
|
|
488
|
+
event_dict['logging.googleapis.com/trace_sampled'] = '1' if ctx.trace_flags.sampled else '0'
|
|
489
|
+
|
|
490
|
+
return event_dict
|
|
491
|
+
|
|
43
492
|
|
|
44
493
|
class GenkitGCPExporter(CloudTraceSpanExporter):
|
|
45
494
|
"""Exports spans to a GCP telemetry server.
|
|
@@ -82,37 +531,439 @@ class GenkitGCPExporter(CloudTraceSpanExporter):
|
|
|
82
531
|
deadline=120.0,
|
|
83
532
|
),
|
|
84
533
|
)
|
|
85
|
-
# pylint: disable=broad-except
|
|
86
534
|
except Exception as ex:
|
|
87
535
|
logger.error('Error while writing to Cloud Trace', exc_info=ex)
|
|
88
536
|
return SpanExportResult.FAILURE
|
|
89
537
|
|
|
90
538
|
return SpanExportResult.SUCCESS
|
|
91
539
|
|
|
92
|
-
|
|
93
|
-
|
|
540
|
+
|
|
541
|
+
class GenkitMetricExporter(MetricExporter):
|
|
542
|
+
"""Metric exporter wrapper that adjusts start times for GCP compatibility.
|
|
543
|
+
|
|
544
|
+
Cloud Monitoring does not support delta metrics for custom metrics and will
|
|
545
|
+
convert any DELTA aggregations to CUMULATIVE ones on export. There is implicit
|
|
546
|
+
overlap in the start/end times that the Metric reader sends -- the end_time
|
|
547
|
+
of the previous export becomes the start_time of the current export.
|
|
548
|
+
|
|
549
|
+
This wrapper adds a microsecond to start times to ensure discrete export
|
|
550
|
+
timeframes and prevent data being overwritten.
|
|
551
|
+
|
|
552
|
+
This matches the JavaScript MetricExporterWrapper in gcpOpenTelemetry.ts.
|
|
553
|
+
|
|
554
|
+
Args:
|
|
555
|
+
exporter: The underlying CloudMonitoringMetricsExporter.
|
|
556
|
+
error_handler: Optional callback for export errors.
|
|
557
|
+
"""
|
|
558
|
+
|
|
559
|
+
def __init__(
|
|
560
|
+
self,
|
|
561
|
+
exporter: CloudMonitoringMetricsExporter,
|
|
562
|
+
error_handler: Callable[[Exception], None] | None = None,
|
|
563
|
+
) -> None:
|
|
564
|
+
"""Initialize the metric exporter wrapper.
|
|
565
|
+
|
|
566
|
+
Args:
|
|
567
|
+
exporter: The underlying CloudMonitoringMetricsExporter.
|
|
568
|
+
error_handler: Optional callback for export errors.
|
|
569
|
+
"""
|
|
570
|
+
self._exporter = exporter
|
|
571
|
+
self._error_handler = error_handler
|
|
572
|
+
|
|
573
|
+
# Force DELTA temporality for all instrument types to match JS implementation.
|
|
574
|
+
delta = AggregationTemporality.DELTA
|
|
575
|
+
self._preferred_temporality = {
|
|
576
|
+
Counter: delta,
|
|
577
|
+
UpDownCounter: delta,
|
|
578
|
+
Histogram: delta,
|
|
579
|
+
ObservableCounter: delta,
|
|
580
|
+
ObservableUpDownCounter: delta,
|
|
581
|
+
ObservableGauge: delta,
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
self._preferred_aggregation = getattr(exporter, '_preferred_aggregation', None)
|
|
585
|
+
|
|
586
|
+
def export(
|
|
587
|
+
self,
|
|
588
|
+
metrics_data: MetricsData,
|
|
589
|
+
timeout_millis: float = 10_000,
|
|
590
|
+
**kwargs: object,
|
|
591
|
+
) -> MetricExportResult:
|
|
592
|
+
"""Export metrics after adjusting start times.
|
|
593
|
+
|
|
594
|
+
Modifies start times of each data point to ensure no overlap with
|
|
595
|
+
previous exports when GCP converts DELTA to CUMULATIVE.
|
|
94
596
|
|
|
95
597
|
Args:
|
|
96
|
-
|
|
598
|
+
metrics_data: The metrics data to export.
|
|
599
|
+
timeout_millis: Export timeout in milliseconds.
|
|
600
|
+
**kwargs: Additional arguments for base class compatibility.
|
|
97
601
|
|
|
98
602
|
Returns:
|
|
99
|
-
|
|
603
|
+
The export result from the wrapped exporter.
|
|
100
604
|
"""
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
605
|
+
# Modify start times before export
|
|
606
|
+
self._modify_start_times(metrics_data)
|
|
607
|
+
|
|
608
|
+
try:
|
|
609
|
+
return self._exporter.export(metrics_data, timeout_millis, **kwargs)
|
|
610
|
+
except Exception as e:
|
|
611
|
+
if self._error_handler:
|
|
612
|
+
self._error_handler(e)
|
|
613
|
+
raise
|
|
614
|
+
|
|
615
|
+
def _modify_start_times(self, metrics_data: MetricsData) -> None:
|
|
616
|
+
"""Add 1ms to start times to prevent overlap.
|
|
617
|
+
|
|
618
|
+
Args:
|
|
619
|
+
metrics_data: The metrics data to modify in-place.
|
|
620
|
+
"""
|
|
621
|
+
for resource_metrics in metrics_data.resource_metrics:
|
|
622
|
+
for scope_metrics in resource_metrics.scope_metrics:
|
|
623
|
+
for metric in scope_metrics.metrics:
|
|
624
|
+
for data_point in metric.data.data_points:
|
|
625
|
+
# Add 1 millisecond (1_000_000 nanoseconds) to start time
|
|
626
|
+
if hasattr(data_point, 'start_time_unix_nano'):
|
|
627
|
+
# Modifying frozen dataclass via workaround
|
|
628
|
+
object.__setattr__(
|
|
629
|
+
data_point,
|
|
630
|
+
'start_time_unix_nano',
|
|
631
|
+
data_point.start_time_unix_nano + 1_000_000,
|
|
632
|
+
)
|
|
633
|
+
|
|
634
|
+
def force_flush(self, timeout_millis: float = 10_000) -> bool:
|
|
635
|
+
"""Delegate force flush to wrapped exporter.
|
|
636
|
+
|
|
637
|
+
Args:
|
|
638
|
+
timeout_millis: Timeout in milliseconds.
|
|
639
|
+
|
|
640
|
+
Returns:
|
|
641
|
+
True if flush succeeded.
|
|
642
|
+
"""
|
|
643
|
+
if hasattr(self._exporter, 'force_flush'):
|
|
644
|
+
return self._exporter.force_flush(timeout_millis)
|
|
645
|
+
return True
|
|
646
|
+
|
|
647
|
+
def shutdown(self, timeout_millis: float = 30_000, **kwargs: object) -> None:
|
|
648
|
+
"""Delegate shutdown to wrapped exporter.
|
|
649
|
+
|
|
650
|
+
Args:
|
|
651
|
+
timeout_millis: Timeout in milliseconds.
|
|
652
|
+
**kwargs: Additional arguments for base class compatibility.
|
|
653
|
+
"""
|
|
654
|
+
self._exporter.shutdown(timeout_millis, **kwargs)
|
|
655
|
+
|
|
656
|
+
|
|
657
|
+
class TimeAdjustedSpan(RedactedSpan):
|
|
658
|
+
"""Wraps a span to ensure non-zero duration for GCP.
|
|
659
|
+
|
|
660
|
+
GCP Trace requires end_time > start_time.
|
|
661
|
+
"""
|
|
662
|
+
|
|
663
|
+
@property
|
|
664
|
+
def end_time(self) -> int | None:
|
|
665
|
+
"""Return the span end time, adjusted to be > start_time."""
|
|
666
|
+
start = self._span.start_time
|
|
667
|
+
end = self._span.end_time
|
|
668
|
+
|
|
669
|
+
# GCP requires end_time > start_time.
|
|
670
|
+
# If the span is unfinished (end_time is None) or has zero duration,
|
|
671
|
+
# we provide a minimum 1 microsecond duration.
|
|
672
|
+
if start is not None:
|
|
673
|
+
if end is None or end <= start:
|
|
674
|
+
return start + 1000
|
|
675
|
+
|
|
676
|
+
return end
|
|
677
|
+
|
|
678
|
+
|
|
679
|
+
class GcpAdjustingTraceExporter(AdjustingTraceExporter):
|
|
680
|
+
"""GCP-specific span exporter that adds telemetry recording.
|
|
681
|
+
|
|
682
|
+
This extends the base AdjustingTraceExporter to add GCP-specific telemetry
|
|
683
|
+
recording (metrics and logs) for each span, matching the JavaScript
|
|
684
|
+
implementation in gcpOpenTelemetry.ts.
|
|
685
|
+
|
|
686
|
+
The telemetry handlers record:
|
|
687
|
+
- Feature metrics (requests, latency) for root spans
|
|
688
|
+
- Path metrics for failure tracking
|
|
689
|
+
- Generate metrics (tokens, latency) for model actions
|
|
690
|
+
- Action logs for tools and generate
|
|
691
|
+
- Engagement metrics for user feedback
|
|
692
|
+
|
|
693
|
+
Example:
|
|
694
|
+
```python
|
|
695
|
+
exporter = GcpAdjustingTraceExporter(
|
|
696
|
+
exporter=GenkitGCPExporter(),
|
|
697
|
+
log_input_and_output=False,
|
|
698
|
+
project_id='my-project',
|
|
699
|
+
)
|
|
700
|
+
```
|
|
701
|
+
"""
|
|
702
|
+
|
|
703
|
+
def __init__(
|
|
704
|
+
self,
|
|
705
|
+
exporter: SpanExporter,
|
|
706
|
+
log_input_and_output: bool = False,
|
|
707
|
+
project_id: str | None = None,
|
|
708
|
+
error_handler: Callable[[Exception], None] | None = None,
|
|
709
|
+
) -> None:
|
|
710
|
+
"""Initialize the GCP adjusting trace exporter.
|
|
711
|
+
|
|
712
|
+
Args:
|
|
713
|
+
exporter: The underlying SpanExporter to wrap.
|
|
714
|
+
log_input_and_output: If True, preserve input/output in spans and logs.
|
|
715
|
+
Defaults to False (redact for privacy).
|
|
716
|
+
project_id: Optional GCP project ID for log correlation.
|
|
717
|
+
error_handler: Optional callback invoked when export errors occur.
|
|
718
|
+
"""
|
|
719
|
+
super().__init__(
|
|
720
|
+
exporter=exporter,
|
|
721
|
+
log_input_and_output=log_input_and_output,
|
|
722
|
+
project_id=project_id,
|
|
723
|
+
error_handler=error_handler,
|
|
724
|
+
)
|
|
725
|
+
self._log_input_and_output = log_input_and_output
|
|
726
|
+
self._project_id = project_id
|
|
727
|
+
|
|
728
|
+
def _adjust(self, span: ReadableSpan) -> ReadableSpan:
|
|
729
|
+
"""Apply all adjustments to a span including telemetry.
|
|
730
|
+
|
|
731
|
+
This overrides the base method to add telemetry recording before
|
|
732
|
+
the standard adjustments (redaction, marking, normalization).
|
|
733
|
+
|
|
734
|
+
Args:
|
|
735
|
+
span: The span to adjust.
|
|
736
|
+
|
|
737
|
+
Returns:
|
|
738
|
+
The adjusted span.
|
|
739
|
+
"""
|
|
740
|
+
# Record telemetry before adjustments (uses original attributes)
|
|
741
|
+
span = self._tick_telemetry(span)
|
|
112
742
|
|
|
113
|
-
|
|
743
|
+
# Apply standard adjustments from base class
|
|
744
|
+
span = super()._adjust(span)
|
|
114
745
|
|
|
746
|
+
# Fix start/end times for GCP (must be end > start)
|
|
747
|
+
return TimeAdjustedSpan(span, dict(span.attributes) if span.attributes else {})
|
|
748
|
+
|
|
749
|
+
def _tick_telemetry(self, span: ReadableSpan) -> ReadableSpan:
|
|
750
|
+
"""Record telemetry for a span and apply root state marking.
|
|
751
|
+
|
|
752
|
+
This matches the JavaScript tickTelemetry method in gcpOpenTelemetry.ts.
|
|
753
|
+
It calls the appropriate telemetry handlers based on span type.
|
|
754
|
+
|
|
755
|
+
Args:
|
|
756
|
+
span: The span to record telemetry for.
|
|
757
|
+
|
|
758
|
+
Returns:
|
|
759
|
+
The span, potentially with genkit:rootState added for root spans.
|
|
760
|
+
"""
|
|
761
|
+
attrs = span.attributes or {}
|
|
762
|
+
if 'genkit:type' not in attrs:
|
|
763
|
+
return span
|
|
764
|
+
|
|
765
|
+
span_type = str(attrs.get('genkit:type', ''))
|
|
766
|
+
subtype = str(attrs.get('genkit:metadata:subtype', ''))
|
|
767
|
+
is_root = bool(attrs.get('genkit:isRoot'))
|
|
768
|
+
|
|
769
|
+
try:
|
|
770
|
+
# Always record path telemetry for error tracking
|
|
771
|
+
paths_telemetry.tick(span, self._log_input_and_output, self._project_id)
|
|
772
|
+
|
|
773
|
+
if is_root:
|
|
774
|
+
# Report top level feature request and latency only for root spans
|
|
775
|
+
features_telemetry.tick(span, self._log_input_and_output, self._project_id)
|
|
776
|
+
|
|
777
|
+
# Set root state explicitly
|
|
778
|
+
# (matches JS: span.attributes['genkit:rootState'] = span.attributes['genkit:state'])
|
|
779
|
+
state = attrs.get('genkit:state')
|
|
780
|
+
if state:
|
|
781
|
+
new_attrs = dict(attrs)
|
|
782
|
+
new_attrs['genkit:rootState'] = state
|
|
783
|
+
span = RedactedSpan(span, new_attrs)
|
|
784
|
+
else:
|
|
785
|
+
if span_type == 'action' and subtype == 'model':
|
|
786
|
+
# Report generate metrics for all model actions
|
|
787
|
+
generate_telemetry.tick(span, self._log_input_and_output, self._project_id)
|
|
788
|
+
|
|
789
|
+
if span_type == 'action' and subtype == 'tool':
|
|
790
|
+
# TODO(#4359): Report input and output for tool actions (matching JS comment)
|
|
791
|
+
pass
|
|
792
|
+
|
|
793
|
+
if span_type in ('action', 'flow', 'flowStep', 'util'):
|
|
794
|
+
# Report request and latency metrics for all actions
|
|
795
|
+
action_telemetry.tick(span, self._log_input_and_output, self._project_id)
|
|
796
|
+
|
|
797
|
+
if span_type == 'userEngagement':
|
|
798
|
+
# Report user acceptance and feedback metrics
|
|
799
|
+
engagement_telemetry.tick(span, self._log_input_and_output, self._project_id)
|
|
800
|
+
|
|
801
|
+
except Exception as e:
|
|
802
|
+
logger.warning('Error recording telemetry', error=str(e))
|
|
803
|
+
|
|
804
|
+
return span
|
|
805
|
+
|
|
806
|
+
|
|
807
|
+
def add_gcp_telemetry(
|
|
808
|
+
project_id: str | None = None,
|
|
809
|
+
credentials: dict[str, Any] | None = None,
|
|
810
|
+
sampler: Sampler | None = None,
|
|
811
|
+
log_input_and_output: bool = False,
|
|
812
|
+
force_dev_export: bool = True,
|
|
813
|
+
disable_metrics: bool = False,
|
|
814
|
+
disable_traces: bool = False,
|
|
815
|
+
metric_export_interval_ms: int | None = None,
|
|
816
|
+
metric_export_timeout_ms: int | None = None,
|
|
817
|
+
# Legacy parameter name for backwards compatibility
|
|
818
|
+
force_export: bool | None = None,
|
|
819
|
+
) -> None:
|
|
820
|
+
"""Configure GCP telemetry export for traces and metrics.
|
|
821
|
+
|
|
822
|
+
This function sets up OpenTelemetry export to Google Cloud Trace and
|
|
823
|
+
Cloud Monitoring. By default, model inputs and outputs are redacted
|
|
824
|
+
for privacy protection.
|
|
825
|
+
|
|
826
|
+
Configuration options match the JavaScript (GcpTelemetryConfigOptions) and
|
|
827
|
+
Go (FirebaseTelemetryOptions/GoogleCloudTelemetryOptions) implementations.
|
|
828
|
+
|
|
829
|
+
Args:
|
|
830
|
+
project_id: Google Cloud project ID. If provided, takes precedence over
|
|
831
|
+
environment variables and credentials. Required when using external
|
|
832
|
+
credentials (e.g., Workload Identity Federation).
|
|
833
|
+
credentials: Service account credentials dict for authenticating with
|
|
834
|
+
Google Cloud. Primarily for use outside of GCP. On GCP, credentials
|
|
835
|
+
are typically inferred via Application Default Credentials (ADC).
|
|
836
|
+
sampler: OpenTelemetry trace sampler. Controls which traces are collected
|
|
837
|
+
and exported. Defaults to AlwaysOnSampler. Common options:
|
|
838
|
+
- AlwaysOnSampler: Collect all traces
|
|
839
|
+
- AlwaysOffSampler: Collect no traces
|
|
840
|
+
- TraceIdRatioBasedSampler: Sample a percentage of traces
|
|
841
|
+
log_input_and_output: If True, preserve model input/output in traces
|
|
842
|
+
and logs. Defaults to False (redact for privacy). Only enable this
|
|
843
|
+
in trusted environments where PII exposure is acceptable.
|
|
844
|
+
Maps to JS: !disableLoggingInputAndOutput
|
|
845
|
+
force_dev_export: If True, export telemetry even in dev environment.
|
|
846
|
+
Defaults to True. Set to False for production-only telemetry.
|
|
847
|
+
Maps to JS: forceDevExport
|
|
848
|
+
disable_metrics: If True, metrics will not be exported. Traces and
|
|
849
|
+
logs may still be exported. Defaults to False.
|
|
850
|
+
Maps to JS/Go: disableMetrics
|
|
851
|
+
disable_traces: If True, traces will not be exported. Metrics and
|
|
852
|
+
logs may still be exported. Defaults to False.
|
|
853
|
+
Maps to JS/Go: disableTraces
|
|
854
|
+
metric_export_interval_ms: Metrics export interval in milliseconds.
|
|
855
|
+
GCP requires a minimum of 5000ms. Defaults to 60000ms.
|
|
856
|
+
Dev environment uses 5000ms, production uses 300000ms by default
|
|
857
|
+
in JS/Go (but we use 60000ms for consistent behavior).
|
|
858
|
+
Maps to JS/Go: metricExportIntervalMillis
|
|
859
|
+
metric_export_timeout_ms: Timeout for metrics export in milliseconds.
|
|
860
|
+
Defaults to the export interval if not specified.
|
|
861
|
+
Maps to JS/Go: metricExportTimeoutMillis
|
|
862
|
+
force_export: Deprecated. Use force_dev_export instead.
|
|
863
|
+
|
|
864
|
+
Example:
|
|
865
|
+
```python
|
|
866
|
+
# Default: PII redaction enabled
|
|
867
|
+
add_gcp_telemetry()
|
|
868
|
+
|
|
869
|
+
# Enable input/output logging (disable PII redaction)
|
|
870
|
+
add_gcp_telemetry(log_input_and_output=True)
|
|
871
|
+
|
|
872
|
+
# Force export in dev environment with specific project
|
|
873
|
+
add_gcp_telemetry(force_dev_export=True, project_id='my-project')
|
|
874
|
+
|
|
875
|
+
# Disable metrics but keep traces
|
|
876
|
+
add_gcp_telemetry(disable_metrics=True)
|
|
877
|
+
|
|
878
|
+
# Custom metric export interval (minimum 5000ms)
|
|
879
|
+
add_gcp_telemetry(metric_export_interval_ms=30000)
|
|
880
|
+
|
|
881
|
+
# With custom credentials for non-GCP environments
|
|
882
|
+
add_gcp_telemetry(
|
|
883
|
+
project_id='my-project',
|
|
884
|
+
credentials={'type': 'service_account', ...},
|
|
885
|
+
)
|
|
886
|
+
```
|
|
887
|
+
|
|
888
|
+
Note:
|
|
889
|
+
This matches the JavaScript implementation's GcpTelemetryConfigOptions
|
|
890
|
+
and Go's FirebaseTelemetryOptions/GoogleCloudTelemetryOptions.
|
|
891
|
+
|
|
892
|
+
See Also:
|
|
893
|
+
- JS: js/plugins/google-cloud/src/types.ts (GcpTelemetryConfigOptions)
|
|
894
|
+
- Go: go/plugins/firebase/telemetry.go (FirebaseTelemetryOptions)
|
|
895
|
+
- Go: go/plugins/googlecloud/types.go (GoogleCloudTelemetryOptions)
|
|
896
|
+
"""
|
|
897
|
+
# Handle legacy force_export parameter
|
|
898
|
+
if force_export is not None:
|
|
899
|
+
logger.warning('force_export is deprecated, use force_dev_export instead')
|
|
900
|
+
force_dev_export = force_export
|
|
901
|
+
|
|
902
|
+
manager = GcpTelemetry(
|
|
903
|
+
project_id=project_id,
|
|
904
|
+
credentials=credentials,
|
|
905
|
+
sampler=sampler,
|
|
906
|
+
log_input_and_output=log_input_and_output,
|
|
907
|
+
force_dev_export=force_dev_export,
|
|
908
|
+
disable_metrics=disable_metrics,
|
|
909
|
+
disable_traces=disable_traces,
|
|
910
|
+
metric_export_interval_ms=metric_export_interval_ms,
|
|
911
|
+
metric_export_timeout_ms=metric_export_timeout_ms,
|
|
912
|
+
)
|
|
913
|
+
|
|
914
|
+
manager.initialize()
|
|
915
|
+
|
|
916
|
+
|
|
917
|
+
# Error handling helpers (matches JS getErrorHandler pattern)
|
|
918
|
+
_tracing_error_logged = False
|
|
919
|
+
_metrics_error_logged = False
|
|
920
|
+
|
|
921
|
+
|
|
922
|
+
def _handle_tracing_error(error: Exception) -> None:
|
|
923
|
+
"""Handle trace export errors with helpful messages.
|
|
924
|
+
|
|
925
|
+
Only logs detailed instructions once to avoid spam.
|
|
926
|
+
|
|
927
|
+
Args:
|
|
928
|
+
error: The export error.
|
|
929
|
+
"""
|
|
930
|
+
global _tracing_error_logged
|
|
931
|
+
if _tracing_error_logged:
|
|
932
|
+
return
|
|
933
|
+
|
|
934
|
+
error_str = str(error).lower()
|
|
935
|
+
if 'permission' in error_str or 'denied' in error_str or '403' in error_str:
|
|
936
|
+
_tracing_error_logged = True
|
|
937
|
+
logger.error(
|
|
938
|
+
'Unable to send traces to Google Cloud. '
|
|
939
|
+
'Ensure the service account has the "Cloud Trace Agent" (roles/cloudtrace.agent) role. '
|
|
940
|
+
f'Error: {error}'
|
|
941
|
+
)
|
|
942
|
+
else:
|
|
943
|
+
logger.error('Error exporting traces to GCP', error=str(error))
|
|
944
|
+
|
|
945
|
+
|
|
946
|
+
def _handle_metric_error(error: Exception) -> None:
|
|
947
|
+
"""Handle metrics export errors with helpful messages.
|
|
948
|
+
|
|
949
|
+
Only logs detailed instructions once to avoid spam.
|
|
950
|
+
|
|
951
|
+
Args:
|
|
952
|
+
error: The export error.
|
|
953
|
+
"""
|
|
954
|
+
global _metrics_error_logged
|
|
955
|
+
if _metrics_error_logged:
|
|
956
|
+
return
|
|
115
957
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
958
|
+
error_str = str(error).lower()
|
|
959
|
+
if 'permission' in error_str or 'denied' in error_str or '403' in error_str:
|
|
960
|
+
_metrics_error_logged = True
|
|
961
|
+
logger.error(
|
|
962
|
+
'Unable to send metrics to Google Cloud. '
|
|
963
|
+
'Ensure the service account has the "Monitoring Metric Writer" '
|
|
964
|
+
'(roles/monitoring.metricWriter) or "Cloud Telemetry Metrics Writer" '
|
|
965
|
+
'(roles/telemetry.metricsWriter) role. '
|
|
966
|
+
f'Error: {error}'
|
|
967
|
+
)
|
|
968
|
+
else:
|
|
969
|
+
logger.error('Error exporting metrics to GCP', error=str(error))
|