genkit-plugin-google-cloud 0.3.2__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,31 +15,480 @@
15
15
  # SPDX-License-Identifier: Apache-2.0
16
16
 
17
17
 
18
- """Telemetry and tracing functionality for the Genkit framework.
18
+ """Telemetry and tracing functionality for the Genkit Google Cloud plugin.
19
19
 
20
20
  This module provides functionality for collecting and exporting telemetry data
21
- from Genkit operations. It uses OpenTelemetry for tracing and exports span
22
- data to a telemetry GCP server for monitoring and debugging purposes.
21
+ from Genkit operations to Google Cloud. It uses OpenTelemetry for tracing and
22
+ exports span data to Google Cloud Trace for monitoring and debugging purposes.
23
23
 
24
- The module includes:
25
- - A custom span exporter for sending trace data to a telemetry GCP server
24
+ Architecture Overview:
25
+ The telemetry system follows a pipeline architecture that processes spans
26
+ (traces) and metrics before exporting them to Google Cloud:
27
+
28
+ ```
29
+ ┌─────────────────────────────────────────────────────────────────────────┐
30
+ │ TELEMETRY DATA FLOW │
31
+ │ │
32
+ │ Genkit Actions (flows, models, tools) │
33
+ │ │ │
34
+ │ ▼ │
35
+ │ ┌─────────────────┐ │
36
+ │ │ OpenTelemetry │ Creates spans with genkit:* attributes │
37
+ │ │ Tracer │ (type, name, input, output, state, path, etc.) │
38
+ │ └────────┬────────┘ │
39
+ │ │ │
40
+ │ ▼ │
41
+ │ ┌─────────────────────────────────────────────────────────────┐ │
42
+ │ │ GcpAdjustingTraceExporter │ │
43
+ │ │ ┌─────────────────────────────────────────────────────┐ │ │
44
+ │ │ │ 1. _tick_telemetry() │ │ │
45
+ │ │ │ - pathsTelemetry.tick() → Error metrics/logs │ │ │
46
+ │ │ │ - featuresTelemetry.tick() → Feature metrics │ │ │
47
+ │ │ │ - generateTelemetry.tick() → Model metrics │ │ │
48
+ │ │ │ - actionTelemetry.tick() → Action I/O logs │ │ │
49
+ │ │ │ - engagementTelemetry.tick() → Feedback metrics │ │ │
50
+ │ │ │ - Sets genkit:rootState for root spans │ │ │
51
+ │ │ └─────────────────────────────────────────────────────┘ │ │
52
+ │ │ ┌─────────────────────────────────────────────────────┐ │ │
53
+ │ │ │ 2. AdjustingTraceExporter._adjust() │ │ │
54
+ │ │ │ - Redact genkit:input/output → "<redacted>" │ │ │
55
+ │ │ │ - Mark error spans with /http/status_code: 599 │ │ │
56
+ │ │ │ - Mark failed spans with genkit:failedSpan │ │ │
57
+ │ │ │ - Mark root spans with genkit:feature │ │ │
58
+ │ │ │ - Mark model spans with genkit:model │ │ │
59
+ │ │ │ - Normalize labels (: → /) for GCP compatibility │ │ │
60
+ │ │ └─────────────────────────────────────────────────────┘ │ │
61
+ │ └────────────────────────┬────────────────────────────────────┘ │
62
+ │ │ │
63
+ │ ┌───────────────┴───────────────┐ │
64
+ │ ▼ ▼ │
65
+ │ ┌─────────────────┐ ┌─────────────────┐ │
66
+ │ │ GenkitGCPExporter│ │ Cloud Logging │ │
67
+ │ │ (Cloud Trace) │ │ (via structlog) │ │
68
+ │ └────────┬────────┘ └─────────────────┘ │
69
+ │ │ │
70
+ │ ▼ │
71
+ │ ┌─────────────────┐ │
72
+ │ │ Google Cloud │ │
73
+ │ │ Trace API │ │
74
+ │ └─────────────────┘ │
75
+ │ │
76
+ │ ─────────────────────── METRICS PIPELINE ──────────────────────── │
77
+ │ │
78
+ │ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │
79
+ │ │ OpenTelemetry │───▶│ GenkitMetric │───▶│ Cloud Monitoring│ │
80
+ │ │ Meter │ │ Exporter │ │ API │ │
81
+ │ │ (counters, │ │ (adjusts start │ │ │ │
82
+ │ │ histograms) │ │ times for │ │ │ │
83
+ │ └─────────────────┘ │ DELTA→CUMUL.) │ └─────────────────┘ │
84
+ │ └─────────────────┘ │
85
+ └─────────────────────────────────────────────────────────────────────────┘
86
+ ```
87
+
88
+ Key Components:
89
+ 1. **GcpAdjustingTraceExporter**: Extends AdjustingTraceExporter to add
90
+ GCP-specific telemetry recording before spans are adjusted and exported.
91
+
92
+ 2. **AdjustingTraceExporter** (from genkit.core.trace): Base class that
93
+ handles PII redaction, error marking, and label normalization.
94
+
95
+ 3. **GenkitGCPExporter**: Extends CloudTraceSpanExporter with retry logic
96
+ for reliable delivery to Google Cloud Trace.
97
+
98
+ 4. **GenkitMetricExporter**: Wraps CloudMonitoringMetricsExporter and
99
+ adjusts start times to prevent overlap when GCP converts DELTA to
100
+ CUMULATIVE aggregation.
101
+
102
+ 5. **Telemetry Handlers** (in separate modules):
103
+ - feature.py: Tracks root span requests/latency
104
+ - path.py: Tracks error paths and failure metrics
105
+ - generate.py: Tracks model usage (tokens, latency, media)
106
+ - action.py: Logs tool and action I/O
107
+ - engagement.py: Tracks user feedback and acceptance
108
+
109
+ Telemetry Types and When They Fire:
110
+ ┌─────────────────────────────────────────────────────────────────────────┐
111
+ │ Telemetry Type │ Condition │ What It Records │
112
+ ├────────────────┼──────────────────────────────┼─────────────────────────┤
113
+ │ paths │ Always (for all spans) │ Error paths, failures │
114
+ │ features │ genkit:isRoot = true │ Request count, latency │
115
+ │ generate │ type=action, subtype=model │ Tokens, latency, media │
116
+ │ action │ type in (action,flow,...) │ Input/output logs │
117
+ │ engagement │ type=userEngagement │ Feedback, acceptance │
118
+ └────────────────┴──────────────────────────────┴─────────────────────────┘
119
+
120
+ Span Attributes Used:
121
+ The system reads these genkit:* attributes from spans:
122
+ - genkit:type - Span type (action, flow, flowStep, util, userEngagement)
123
+ - genkit:metadata:subtype - Subtype (model, tool, etc.)
124
+ - genkit:isRoot - Whether this is the root/entry span
125
+ - genkit:name - Action/flow name
126
+ - genkit:path - Hierarchical path like /{flow,t:flow}/{step,t:flowStep}
127
+ - genkit:input - JSON-encoded input data
128
+ - genkit:output - JSON-encoded output data
129
+ - genkit:state - Span state (success, error)
130
+ - genkit:isFailureSource - Whether this span is the source of a failure
131
+
132
+ Configuration Options (matching JS/Go parity):
133
+ ┌─────────────────────────────────────────────────────────────────────────┐
134
+ │ Option │ Type │ Default │ Description │
135
+ ├─────────────────────────────┼──────────┼────────────┼───────────────────┤
136
+ │ project_id │ str │ Auto │ GCP project ID │
137
+ │ credentials │ dict │ ADC │ Service account │
138
+ │ log_input_and_output │ bool │ False │ Disable redaction │
139
+ │ force_dev_export │ bool │ True │ Export in dev │
140
+ │ disable_metrics │ bool │ False │ Skip metrics │
141
+ │ disable_traces │ bool │ False │ Skip traces │
142
+ │ metric_export_interval_ms │ int │ 60000 │ Export interval │
143
+ │ metric_export_timeout_ms │ int │ None │ Export timeout │
144
+ │ sampler │ Sampler │ AlwaysOn │ Trace sampler │
145
+ └─────────────────────────────┴──────────┴────────────┴───────────────────┘
146
+
147
+ Project ID Resolution Order:
148
+ 1. Explicit project_id parameter
149
+ 2. FIREBASE_PROJECT_ID environment variable
150
+ 3. GOOGLE_CLOUD_PROJECT environment variable
151
+ 4. GCLOUD_PROJECT environment variable
152
+ 5. project_id from credentials dict
153
+
154
+ Usage:
155
+ ```python
156
+ from genkit.plugins.google_cloud import add_gcp_telemetry
157
+
158
+ # Enable telemetry with default settings (PII redaction enabled)
159
+ add_gcp_telemetry()
160
+
161
+ # Enable telemetry with input/output logging (disable PII redaction)
162
+ add_gcp_telemetry(log_input_and_output=True)
163
+
164
+ # Force export even in dev environment
165
+ add_gcp_telemetry(force_dev_export=True)
166
+
167
+ # Disable metrics but keep traces
168
+ add_gcp_telemetry(disable_metrics=True)
169
+
170
+ # Custom metric export interval (minimum 5000ms for GCP)
171
+ add_gcp_telemetry(metric_export_interval_ms=30000)
172
+ ```
173
+
174
+ Caveats:
175
+ - By default, model inputs and outputs are redacted for privacy
176
+ - Set log_input_and_output=True only in trusted environments
177
+ - In dev environment, telemetry is skipped unless force_dev_export=True
178
+ - GCP requires minimum 5000ms metric export interval (see quotas link below)
179
+
180
+ GCP Documentation References:
181
+ Cloud Trace:
182
+ - Overview: https://cloud.google.com/trace/docs
183
+ - IAM Roles: https://cloud.google.com/trace/docs/iam
184
+ - Required role: roles/cloudtrace.agent (Cloud Trace Agent)
185
+
186
+ Cloud Monitoring:
187
+ - Overview: https://cloud.google.com/monitoring/docs
188
+ - Quotas & Limits: https://cloud.google.com/monitoring/quotas
189
+ - Required role: roles/monitoring.metricWriter (Monitoring Metric Writer)
190
+ or roles/telemetry.metricsWriter (Cloud Telemetry Metrics Writer)
191
+
192
+ OpenTelemetry GCP Exporters:
193
+ - Documentation: https://google-cloud-opentelemetry.readthedocs.io/
194
+ - Cloud Trace Exporter: https://google-cloud-opentelemetry.readthedocs.io/en/stable/cloud_trace/cloud_trace.html
195
+ - Cloud Monitoring Exporter: https://google-cloud-opentelemetry.readthedocs.io/en/stable/cloud_monitoring/cloud_monitoring.html
196
+
197
+ Cross-Language Parity:
198
+ This implementation maintains parity with:
199
+ - JavaScript: js/plugins/google-cloud/src/gcpOpenTelemetry.ts
200
+ - Go: go/plugins/googlecloud/googlecloud.go
201
+ - Go: go/plugins/firebase/telemetry.go (FirebaseTelemetryOptions)
202
+
203
+ Key parity points:
204
+ - Same configuration options with equivalent semantics
205
+ - Same telemetry dispatch logic (when each handler fires)
206
+ - Same metrics names and dimensions
207
+ - Same span adjustment pipeline (redaction, marking, normalization)
208
+ - Same project ID resolution order
26
209
  """
27
210
 
28
- from collections.abc import Sequence
211
+ import logging
212
+ import os
213
+ import uuid
214
+ from collections.abc import Callable, Mapping, MutableMapping, Sequence
215
+ from typing import Any, cast
29
216
 
30
217
  import structlog
31
218
  from google.api_core import exceptions as core_exceptions, retry as retries
32
219
  from google.cloud.trace_v2 import BatchWriteSpansRequest
220
+ from opentelemetry import metrics, trace
221
+ from opentelemetry.exporter.cloud_monitoring import CloudMonitoringMetricsExporter
33
222
  from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter
34
- from opentelemetry.sdk.trace import ReadableSpan
35
- from opentelemetry.sdk.trace.export import (
36
- SpanExportResult,
223
+ from opentelemetry.resourcedetector.gcp_resource_detector import (
224
+ GoogleCloudResourceDetector,
37
225
  )
226
+ from opentelemetry.sdk.metrics import (
227
+ Counter,
228
+ Histogram,
229
+ MeterProvider,
230
+ ObservableCounter,
231
+ ObservableGauge,
232
+ ObservableUpDownCounter,
233
+ UpDownCounter,
234
+ )
235
+ from opentelemetry.sdk.metrics.export import (
236
+ AggregationTemporality,
237
+ MetricExporter,
238
+ MetricExportResult,
239
+ MetricsData,
240
+ PeriodicExportingMetricReader,
241
+ )
242
+ from opentelemetry.sdk.resources import SERVICE_INSTANCE_ID, SERVICE_NAME, Resource
243
+ from opentelemetry.sdk.trace import ReadableSpan
244
+ from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
245
+ from opentelemetry.sdk.trace.sampling import Sampler
38
246
 
247
+ from genkit.core.environment import is_dev_environment
248
+ from genkit.core.trace.adjusting_exporter import AdjustingTraceExporter, RedactedSpan
39
249
  from genkit.core.tracing import add_custom_exporter
40
250
 
251
+ from .action import action_telemetry
252
+ from .engagement import engagement_telemetry
253
+ from .feature import features_telemetry
254
+ from .generate import generate_telemetry
255
+ from .path import paths_telemetry
256
+
41
257
  logger = structlog.get_logger(__name__)
42
258
 
259
+ # Constants matching JS/Go implementations
260
+ MIN_METRIC_EXPORT_INTERVAL_MS = 5000
261
+ DEFAULT_METRIC_EXPORT_INTERVAL_MS = 300000
262
+ DEV_METRIC_EXPORT_INTERVAL_MS = 5000
263
+ PROD_METRIC_EXPORT_INTERVAL_MS = 300000
264
+
265
+
266
+ def _resolve_project_id(
267
+ project_id: str | None = None,
268
+ credentials: dict[str, Any] | None = None,
269
+ ) -> str | None:
270
+ """Resolve the GCP project ID from various sources.
271
+
272
+ Resolution order (matching JS/Go):
273
+ 1. Explicit project_id parameter
274
+ 2. FIREBASE_PROJECT_ID environment variable
275
+ 3. GOOGLE_CLOUD_PROJECT environment variable
276
+ 4. GCLOUD_PROJECT environment variable
277
+ 5. Project ID from credentials
278
+
279
+ Args:
280
+ project_id: Explicitly provided project ID.
281
+ credentials: Optional credentials dict with project_id.
282
+
283
+ Returns:
284
+ The resolved project ID or None.
285
+ """
286
+ if project_id:
287
+ return project_id
288
+
289
+ # Check environment variables in order of priority
290
+ for env_var in ('FIREBASE_PROJECT_ID', 'GOOGLE_CLOUD_PROJECT', 'GCLOUD_PROJECT'):
291
+ env_value = os.environ.get(env_var)
292
+ if env_value:
293
+ return env_value
294
+
295
+ # Check credentials for project_id
296
+ if credentials and 'project_id' in credentials:
297
+ return credentials['project_id']
298
+
299
+ return None
300
+
301
+
302
+ class GcpTelemetry:
303
+ """Central manager for GCP Telemetry configuration.
304
+
305
+ Encapsulates configuration and manages the lifecycle of Tracing, Metrics,
306
+ and Logging setup, ensuring consistent state (like project_id) across all
307
+ telemetry components.
308
+ """
309
+
310
+ def __init__(
311
+ self,
312
+ project_id: str | None = None,
313
+ credentials: dict[str, Any] | None = None,
314
+ sampler: Sampler | None = None,
315
+ log_input_and_output: bool = False,
316
+ force_dev_export: bool = True,
317
+ disable_metrics: bool = False,
318
+ disable_traces: bool = False,
319
+ metric_export_interval_ms: int | None = None,
320
+ metric_export_timeout_ms: int | None = None,
321
+ ) -> None:
322
+ """Initialize the GCP Telemetry manager.
323
+
324
+ Args:
325
+ project_id: GCP project ID.
326
+ credentials: Optional credentials dict.
327
+ sampler: Trace sampler.
328
+ log_input_and_output: If False, hides sensitive data.
329
+ force_dev_export: Check to force export in dev environment.
330
+ disable_metrics: If True, metrics are not exported.
331
+ disable_traces: If True, traces are not exported.
332
+ metric_export_interval_ms: Export interval in ms.
333
+ metric_export_timeout_ms: Export timeout in ms.
334
+ """
335
+ self.credentials = credentials
336
+ self.sampler = sampler
337
+ self.log_input_and_output = log_input_and_output
338
+ self.force_dev_export = force_dev_export
339
+ self.disable_metrics = disable_metrics
340
+ self.disable_traces = disable_traces
341
+
342
+ # Resolve project ID immediately
343
+ self.project_id = _resolve_project_id(project_id, credentials)
344
+
345
+ # Determine metric export settings
346
+ is_dev = is_dev_environment()
347
+
348
+ default_interval = DEV_METRIC_EXPORT_INTERVAL_MS if is_dev else DEFAULT_METRIC_EXPORT_INTERVAL_MS
349
+ self.metric_export_interval_ms = metric_export_interval_ms or default_interval
350
+
351
+ if self.metric_export_interval_ms < MIN_METRIC_EXPORT_INTERVAL_MS:
352
+ logger.warning(
353
+ f'metric_export_interval_ms ({self.metric_export_interval_ms}) is below minimum '
354
+ f'({MIN_METRIC_EXPORT_INTERVAL_MS}), using minimum'
355
+ )
356
+ self.metric_export_interval_ms = MIN_METRIC_EXPORT_INTERVAL_MS
357
+
358
+ self.metric_export_timeout_ms = metric_export_timeout_ms or self.metric_export_interval_ms
359
+
360
+ def initialize(self) -> None:
361
+ """Actuates the telemetry configuration."""
362
+ is_dev = is_dev_environment()
363
+ should_export = self.force_dev_export or not is_dev
364
+
365
+ if not should_export:
366
+ logger.debug('Telemetry export disabled in dev environment')
367
+ return
368
+
369
+ self._configure_logging()
370
+ self._configure_tracing()
371
+ self._configure_metrics()
372
+
373
+ def _configure_logging(self) -> None:
374
+ """Configures structlog with trace correlation."""
375
+ try:
376
+ current_config = structlog.get_config()
377
+ processors = current_config.get('processors', [])
378
+
379
+ # Check if our bound method is already registered (by name or other heuristic if needed)
380
+ # Since methods are bound, simple equality check might fail if new instance.
381
+ # However, for simplicity and common usage, we'll append.
382
+ # A better check would be to see if any processor matches our signature/name.
383
+
384
+ # Simple deduplication: Check for function name in processors
385
+ if not any(getattr(p, '__name__', '') == 'inject_trace_context' for p in processors):
386
+
387
+ def inject_trace_context(
388
+ logger: Any, # noqa: ANN401
389
+ method_name: str,
390
+ event_dict: MutableMapping[str, Any],
391
+ ) -> Mapping[str, Any]:
392
+ return self._inject_trace_context(
393
+ cast(logging.Logger, logger), method_name, cast(dict[str, Any], event_dict)
394
+ )
395
+
396
+ new_processors = list(processors)
397
+ new_processors.insert(max(0, len(new_processors) - 1), inject_trace_context)
398
+ structlog.configure(processors=new_processors)
399
+ logger.debug('Configured structlog for GCP trace correlation')
400
+
401
+ except Exception as e:
402
+ logger.warning('Failed to configure structlog for trace correlation', error=str(e))
403
+
404
+ def _configure_tracing(self) -> None:
405
+ if self.disable_traces:
406
+ return
407
+
408
+ exporter_kwargs: dict[str, Any] = {}
409
+ if self.project_id:
410
+ exporter_kwargs['project_id'] = self.project_id
411
+ if self.credentials:
412
+ exporter_kwargs['credentials'] = self.credentials
413
+
414
+ base_exporter = GenkitGCPExporter(**exporter_kwargs) if exporter_kwargs else GenkitGCPExporter()
415
+
416
+ trace_exporter = GcpAdjustingTraceExporter(
417
+ exporter=base_exporter,
418
+ log_input_and_output=self.log_input_and_output,
419
+ project_id=self.project_id,
420
+ error_handler=lambda e: _handle_tracing_error(e),
421
+ )
422
+
423
+ add_custom_exporter(trace_exporter, 'gcp_telemetry_server')
424
+
425
+ def _configure_metrics(self) -> None:
426
+ if self.disable_metrics:
427
+ return
428
+
429
+ try:
430
+ resource = Resource.create({
431
+ SERVICE_NAME: 'genkit',
432
+ SERVICE_INSTANCE_ID: str(uuid.uuid4()),
433
+ })
434
+
435
+ # Suppress detector warnings during GCP resource detection
436
+ detector_logger = logging.getLogger('opentelemetry.resourcedetector.gcp_resource_detector')
437
+ original_level = detector_logger.level
438
+ detector_logger.setLevel(logging.ERROR)
439
+
440
+ try:
441
+ gcp_resource = GoogleCloudResourceDetector(raise_on_error=True).detect()
442
+ resource = resource.merge(gcp_resource)
443
+ except Exception as e:
444
+ # For detection failure log the exception and use the default resource
445
+ detector_logger.warning(f'Google Cloud resource detection failed: {e}')
446
+ finally:
447
+ detector_logger.setLevel(original_level)
448
+
449
+ exporter_kwargs: dict[str, Any] = {}
450
+ if self.project_id:
451
+ exporter_kwargs['project_id'] = self.project_id
452
+ if self.credentials:
453
+ exporter_kwargs['credentials'] = self.credentials
454
+
455
+ metrics_exporter = GenkitMetricExporter(
456
+ exporter=CloudMonitoringMetricsExporter(**exporter_kwargs),
457
+ error_handler=lambda e: _handle_metric_error(e),
458
+ )
459
+
460
+ reader = PeriodicExportingMetricReader(
461
+ metrics_exporter,
462
+ export_interval_millis=self.metric_export_interval_ms,
463
+ export_timeout_millis=self.metric_export_timeout_ms,
464
+ )
465
+
466
+ provider = MeterProvider(metric_readers=[reader], resource=resource)
467
+ metrics.set_meter_provider(provider)
468
+
469
+ except Exception as e:
470
+ _handle_metric_error(e)
471
+
472
+ def _inject_trace_context(
473
+ self, logger: logging.Logger, method_name: str, event_dict: dict[str, Any]
474
+ ) -> dict[str, Any]:
475
+ """Structlog processor to inject GCP-compatible trace context."""
476
+ span = trace.get_current_span()
477
+ if span == trace.INVALID_SPAN:
478
+ return event_dict
479
+
480
+ ctx = span.get_span_context()
481
+ if not ctx.is_valid:
482
+ return event_dict
483
+
484
+ if self.project_id:
485
+ event_dict['logging.googleapis.com/trace'] = f'projects/{self.project_id}/traces/{ctx.trace_id:032x}'
486
+
487
+ event_dict['logging.googleapis.com/spanId'] = f'{ctx.span_id:016x}'
488
+ event_dict['logging.googleapis.com/trace_sampled'] = '1' if ctx.trace_flags.sampled else '0'
489
+
490
+ return event_dict
491
+
43
492
 
44
493
  class GenkitGCPExporter(CloudTraceSpanExporter):
45
494
  """Exports spans to a GCP telemetry server.
@@ -82,37 +531,439 @@ class GenkitGCPExporter(CloudTraceSpanExporter):
82
531
  deadline=120.0,
83
532
  ),
84
533
  )
85
- # pylint: disable=broad-except
86
534
  except Exception as ex:
87
535
  logger.error('Error while writing to Cloud Trace', exc_info=ex)
88
536
  return SpanExportResult.FAILURE
89
537
 
90
538
  return SpanExportResult.SUCCESS
91
539
 
92
- def add_tracer_attributes(self, spans: Sequence[ReadableSpan]) -> Sequence[ReadableSpan]:
93
- """Adds the instrumentation library attribute.
540
+
541
+ class GenkitMetricExporter(MetricExporter):
542
+ """Metric exporter wrapper that adjusts start times for GCP compatibility.
543
+
544
+ Cloud Monitoring does not support delta metrics for custom metrics and will
545
+ convert any DELTA aggregations to CUMULATIVE ones on export. There is implicit
546
+ overlap in the start/end times that the Metric reader sends -- the end_time
547
+ of the previous export becomes the start_time of the current export.
548
+
549
+ This wrapper adds a microsecond to start times to ensure discrete export
550
+ timeframes and prevent data being overwritten.
551
+
552
+ This matches the JavaScript MetricExporterWrapper in gcpOpenTelemetry.ts.
553
+
554
+ Args:
555
+ exporter: The underlying CloudMonitoringMetricsExporter.
556
+ error_handler: Optional callback for export errors.
557
+ """
558
+
559
+ def __init__(
560
+ self,
561
+ exporter: CloudMonitoringMetricsExporter,
562
+ error_handler: Callable[[Exception], None] | None = None,
563
+ ) -> None:
564
+ """Initialize the metric exporter wrapper.
565
+
566
+ Args:
567
+ exporter: The underlying CloudMonitoringMetricsExporter.
568
+ error_handler: Optional callback for export errors.
569
+ """
570
+ self._exporter = exporter
571
+ self._error_handler = error_handler
572
+
573
+ # Force DELTA temporality for all instrument types to match JS implementation.
574
+ delta = AggregationTemporality.DELTA
575
+ self._preferred_temporality = {
576
+ Counter: delta,
577
+ UpDownCounter: delta,
578
+ Histogram: delta,
579
+ ObservableCounter: delta,
580
+ ObservableUpDownCounter: delta,
581
+ ObservableGauge: delta,
582
+ }
583
+
584
+ self._preferred_aggregation = getattr(exporter, '_preferred_aggregation', None)
585
+
586
+ def export(
587
+ self,
588
+ metrics_data: MetricsData,
589
+ timeout_millis: float = 10_000,
590
+ **kwargs: object,
591
+ ) -> MetricExportResult:
592
+ """Export metrics after adjusting start times.
593
+
594
+ Modifies start times of each data point to ensure no overlap with
595
+ previous exports when GCP converts DELTA to CUMULATIVE.
94
596
 
95
597
  Args:
96
- spans: Sequence of spans to modify.
598
+ metrics_data: The metrics data to export.
599
+ timeout_millis: Export timeout in milliseconds.
600
+ **kwargs: Additional arguments for base class compatibility.
97
601
 
98
602
  Returns:
99
- Sequence of spans modified.
603
+ The export result from the wrapped exporter.
100
604
  """
101
- modified_spans: list[ReadableSpan] = []
102
-
103
- for span in spans:
104
- modified_spans.append(
105
- span.attributes.update({
106
- 'instrumentationLibrary': {
107
- 'name': 'genkit-tracer',
108
- 'version': 'v1',
109
- },
110
- })
111
- )
605
+ # Modify start times before export
606
+ self._modify_start_times(metrics_data)
607
+
608
+ try:
609
+ return self._exporter.export(metrics_data, timeout_millis, **kwargs)
610
+ except Exception as e:
611
+ if self._error_handler:
612
+ self._error_handler(e)
613
+ raise
614
+
615
+ def _modify_start_times(self, metrics_data: MetricsData) -> None:
616
+ """Add 1ms to start times to prevent overlap.
617
+
618
+ Args:
619
+ metrics_data: The metrics data to modify in-place.
620
+ """
621
+ for resource_metrics in metrics_data.resource_metrics:
622
+ for scope_metrics in resource_metrics.scope_metrics:
623
+ for metric in scope_metrics.metrics:
624
+ for data_point in metric.data.data_points:
625
+ # Add 1 millisecond (1_000_000 nanoseconds) to start time
626
+ if hasattr(data_point, 'start_time_unix_nano'):
627
+ # Modifying frozen dataclass via workaround
628
+ object.__setattr__(
629
+ data_point,
630
+ 'start_time_unix_nano',
631
+ data_point.start_time_unix_nano + 1_000_000,
632
+ )
633
+
634
+ def force_flush(self, timeout_millis: float = 10_000) -> bool:
635
+ """Delegate force flush to wrapped exporter.
636
+
637
+ Args:
638
+ timeout_millis: Timeout in milliseconds.
639
+
640
+ Returns:
641
+ True if flush succeeded.
642
+ """
643
+ if hasattr(self._exporter, 'force_flush'):
644
+ return self._exporter.force_flush(timeout_millis)
645
+ return True
646
+
647
+ def shutdown(self, timeout_millis: float = 30_000, **kwargs: object) -> None:
648
+ """Delegate shutdown to wrapped exporter.
649
+
650
+ Args:
651
+ timeout_millis: Timeout in milliseconds.
652
+ **kwargs: Additional arguments for base class compatibility.
653
+ """
654
+ self._exporter.shutdown(timeout_millis, **kwargs)
655
+
656
+
657
+ class TimeAdjustedSpan(RedactedSpan):
658
+ """Wraps a span to ensure non-zero duration for GCP.
659
+
660
+ GCP Trace requires end_time > start_time.
661
+ """
662
+
663
+ @property
664
+ def end_time(self) -> int | None:
665
+ """Return the span end time, adjusted to be > start_time."""
666
+ start = self._span.start_time
667
+ end = self._span.end_time
668
+
669
+ # GCP requires end_time > start_time.
670
+ # If the span is unfinished (end_time is None) or has zero duration,
671
+ # we provide a minimum 1 microsecond duration.
672
+ if start is not None:
673
+ if end is None or end <= start:
674
+ return start + 1000
675
+
676
+ return end
677
+
678
+
679
+ class GcpAdjustingTraceExporter(AdjustingTraceExporter):
680
+ """GCP-specific span exporter that adds telemetry recording.
681
+
682
+ This extends the base AdjustingTraceExporter to add GCP-specific telemetry
683
+ recording (metrics and logs) for each span, matching the JavaScript
684
+ implementation in gcpOpenTelemetry.ts.
685
+
686
+ The telemetry handlers record:
687
+ - Feature metrics (requests, latency) for root spans
688
+ - Path metrics for failure tracking
689
+ - Generate metrics (tokens, latency) for model actions
690
+ - Action logs for tools and generate
691
+ - Engagement metrics for user feedback
692
+
693
+ Example:
694
+ ```python
695
+ exporter = GcpAdjustingTraceExporter(
696
+ exporter=GenkitGCPExporter(),
697
+ log_input_and_output=False,
698
+ project_id='my-project',
699
+ )
700
+ ```
701
+ """
702
+
703
+ def __init__(
704
+ self,
705
+ exporter: SpanExporter,
706
+ log_input_and_output: bool = False,
707
+ project_id: str | None = None,
708
+ error_handler: Callable[[Exception], None] | None = None,
709
+ ) -> None:
710
+ """Initialize the GCP adjusting trace exporter.
711
+
712
+ Args:
713
+ exporter: The underlying SpanExporter to wrap.
714
+ log_input_and_output: If True, preserve input/output in spans and logs.
715
+ Defaults to False (redact for privacy).
716
+ project_id: Optional GCP project ID for log correlation.
717
+ error_handler: Optional callback invoked when export errors occur.
718
+ """
719
+ super().__init__(
720
+ exporter=exporter,
721
+ log_input_and_output=log_input_and_output,
722
+ project_id=project_id,
723
+ error_handler=error_handler,
724
+ )
725
+ self._log_input_and_output = log_input_and_output
726
+ self._project_id = project_id
727
+
728
+ def _adjust(self, span: ReadableSpan) -> ReadableSpan:
729
+ """Apply all adjustments to a span including telemetry.
730
+
731
+ This overrides the base method to add telemetry recording before
732
+ the standard adjustments (redaction, marking, normalization).
733
+
734
+ Args:
735
+ span: The span to adjust.
736
+
737
+ Returns:
738
+ The adjusted span.
739
+ """
740
+ # Record telemetry before adjustments (uses original attributes)
741
+ span = self._tick_telemetry(span)
112
742
 
113
- return modified_spans
743
+ # Apply standard adjustments from base class
744
+ span = super()._adjust(span)
114
745
 
746
+ # Fix start/end times for GCP (must be end > start)
747
+ return TimeAdjustedSpan(span, dict(span.attributes) if span.attributes else {})
748
+
749
+ def _tick_telemetry(self, span: ReadableSpan) -> ReadableSpan:
750
+ """Record telemetry for a span and apply root state marking.
751
+
752
+ This matches the JavaScript tickTelemetry method in gcpOpenTelemetry.ts.
753
+ It calls the appropriate telemetry handlers based on span type.
754
+
755
+ Args:
756
+ span: The span to record telemetry for.
757
+
758
+ Returns:
759
+ The span, potentially with genkit:rootState added for root spans.
760
+ """
761
+ attrs = span.attributes or {}
762
+ if 'genkit:type' not in attrs:
763
+ return span
764
+
765
+ span_type = str(attrs.get('genkit:type', ''))
766
+ subtype = str(attrs.get('genkit:metadata:subtype', ''))
767
+ is_root = bool(attrs.get('genkit:isRoot'))
768
+
769
+ try:
770
+ # Always record path telemetry for error tracking
771
+ paths_telemetry.tick(span, self._log_input_and_output, self._project_id)
772
+
773
+ if is_root:
774
+ # Report top level feature request and latency only for root spans
775
+ features_telemetry.tick(span, self._log_input_and_output, self._project_id)
776
+
777
+ # Set root state explicitly
778
+ # (matches JS: span.attributes['genkit:rootState'] = span.attributes['genkit:state'])
779
+ state = attrs.get('genkit:state')
780
+ if state:
781
+ new_attrs = dict(attrs)
782
+ new_attrs['genkit:rootState'] = state
783
+ span = RedactedSpan(span, new_attrs)
784
+ else:
785
+ if span_type == 'action' and subtype == 'model':
786
+ # Report generate metrics for all model actions
787
+ generate_telemetry.tick(span, self._log_input_and_output, self._project_id)
788
+
789
+ if span_type == 'action' and subtype == 'tool':
790
+ # TODO(#4359): Report input and output for tool actions (matching JS comment)
791
+ pass
792
+
793
+ if span_type in ('action', 'flow', 'flowStep', 'util'):
794
+ # Report request and latency metrics for all actions
795
+ action_telemetry.tick(span, self._log_input_and_output, self._project_id)
796
+
797
+ if span_type == 'userEngagement':
798
+ # Report user acceptance and feedback metrics
799
+ engagement_telemetry.tick(span, self._log_input_and_output, self._project_id)
800
+
801
+ except Exception as e:
802
+ logger.warning('Error recording telemetry', error=str(e))
803
+
804
+ return span
805
+
806
+
807
+ def add_gcp_telemetry(
808
+ project_id: str | None = None,
809
+ credentials: dict[str, Any] | None = None,
810
+ sampler: Sampler | None = None,
811
+ log_input_and_output: bool = False,
812
+ force_dev_export: bool = True,
813
+ disable_metrics: bool = False,
814
+ disable_traces: bool = False,
815
+ metric_export_interval_ms: int | None = None,
816
+ metric_export_timeout_ms: int | None = None,
817
+ # Legacy parameter name for backwards compatibility
818
+ force_export: bool | None = None,
819
+ ) -> None:
820
+ """Configure GCP telemetry export for traces and metrics.
821
+
822
+ This function sets up OpenTelemetry export to Google Cloud Trace and
823
+ Cloud Monitoring. By default, model inputs and outputs are redacted
824
+ for privacy protection.
825
+
826
+ Configuration options match the JavaScript (GcpTelemetryConfigOptions) and
827
+ Go (FirebaseTelemetryOptions/GoogleCloudTelemetryOptions) implementations.
828
+
829
+ Args:
830
+ project_id: Google Cloud project ID. If provided, takes precedence over
831
+ environment variables and credentials. Required when using external
832
+ credentials (e.g., Workload Identity Federation).
833
+ credentials: Service account credentials dict for authenticating with
834
+ Google Cloud. Primarily for use outside of GCP. On GCP, credentials
835
+ are typically inferred via Application Default Credentials (ADC).
836
+ sampler: OpenTelemetry trace sampler. Controls which traces are collected
837
+ and exported. Defaults to AlwaysOnSampler. Common options:
838
+ - AlwaysOnSampler: Collect all traces
839
+ - AlwaysOffSampler: Collect no traces
840
+ - TraceIdRatioBasedSampler: Sample a percentage of traces
841
+ log_input_and_output: If True, preserve model input/output in traces
842
+ and logs. Defaults to False (redact for privacy). Only enable this
843
+ in trusted environments where PII exposure is acceptable.
844
+ Maps to JS: !disableLoggingInputAndOutput
845
+ force_dev_export: If True, export telemetry even in dev environment.
846
+ Defaults to True. Set to False for production-only telemetry.
847
+ Maps to JS: forceDevExport
848
+ disable_metrics: If True, metrics will not be exported. Traces and
849
+ logs may still be exported. Defaults to False.
850
+ Maps to JS/Go: disableMetrics
851
+ disable_traces: If True, traces will not be exported. Metrics and
852
+ logs may still be exported. Defaults to False.
853
+ Maps to JS/Go: disableTraces
854
+ metric_export_interval_ms: Metrics export interval in milliseconds.
855
+ GCP requires a minimum of 5000ms. Defaults to 60000ms.
856
+ Dev environment uses 5000ms, production uses 300000ms by default
857
+ in JS/Go (but we use 60000ms for consistent behavior).
858
+ Maps to JS/Go: metricExportIntervalMillis
859
+ metric_export_timeout_ms: Timeout for metrics export in milliseconds.
860
+ Defaults to the export interval if not specified.
861
+ Maps to JS/Go: metricExportTimeoutMillis
862
+ force_export: Deprecated. Use force_dev_export instead.
863
+
864
+ Example:
865
+ ```python
866
+ # Default: PII redaction enabled
867
+ add_gcp_telemetry()
868
+
869
+ # Enable input/output logging (disable PII redaction)
870
+ add_gcp_telemetry(log_input_and_output=True)
871
+
872
+ # Force export in dev environment with specific project
873
+ add_gcp_telemetry(force_dev_export=True, project_id='my-project')
874
+
875
+ # Disable metrics but keep traces
876
+ add_gcp_telemetry(disable_metrics=True)
877
+
878
+ # Custom metric export interval (minimum 5000ms)
879
+ add_gcp_telemetry(metric_export_interval_ms=30000)
880
+
881
+ # With custom credentials for non-GCP environments
882
+ add_gcp_telemetry(
883
+ project_id='my-project',
884
+ credentials={'type': 'service_account', ...},
885
+ )
886
+ ```
887
+
888
+ Note:
889
+ This matches the JavaScript implementation's GcpTelemetryConfigOptions
890
+ and Go's FirebaseTelemetryOptions/GoogleCloudTelemetryOptions.
891
+
892
+ See Also:
893
+ - JS: js/plugins/google-cloud/src/types.ts (GcpTelemetryConfigOptions)
894
+ - Go: go/plugins/firebase/telemetry.go (FirebaseTelemetryOptions)
895
+ - Go: go/plugins/googlecloud/types.go (GoogleCloudTelemetryOptions)
896
+ """
897
+ # Handle legacy force_export parameter
898
+ if force_export is not None:
899
+ logger.warning('force_export is deprecated, use force_dev_export instead')
900
+ force_dev_export = force_export
901
+
902
+ manager = GcpTelemetry(
903
+ project_id=project_id,
904
+ credentials=credentials,
905
+ sampler=sampler,
906
+ log_input_and_output=log_input_and_output,
907
+ force_dev_export=force_dev_export,
908
+ disable_metrics=disable_metrics,
909
+ disable_traces=disable_traces,
910
+ metric_export_interval_ms=metric_export_interval_ms,
911
+ metric_export_timeout_ms=metric_export_timeout_ms,
912
+ )
913
+
914
+ manager.initialize()
915
+
916
+
917
+ # Error handling helpers (matches JS getErrorHandler pattern)
918
+ _tracing_error_logged = False
919
+ _metrics_error_logged = False
920
+
921
+
922
+ def _handle_tracing_error(error: Exception) -> None:
923
+ """Handle trace export errors with helpful messages.
924
+
925
+ Only logs detailed instructions once to avoid spam.
926
+
927
+ Args:
928
+ error: The export error.
929
+ """
930
+ global _tracing_error_logged
931
+ if _tracing_error_logged:
932
+ return
933
+
934
+ error_str = str(error).lower()
935
+ if 'permission' in error_str or 'denied' in error_str or '403' in error_str:
936
+ _tracing_error_logged = True
937
+ logger.error(
938
+ 'Unable to send traces to Google Cloud. '
939
+ 'Ensure the service account has the "Cloud Trace Agent" (roles/cloudtrace.agent) role. '
940
+ f'Error: {error}'
941
+ )
942
+ else:
943
+ logger.error('Error exporting traces to GCP', error=str(error))
944
+
945
+
946
+ def _handle_metric_error(error: Exception) -> None:
947
+ """Handle metrics export errors with helpful messages.
948
+
949
+ Only logs detailed instructions once to avoid spam.
950
+
951
+ Args:
952
+ error: The export error.
953
+ """
954
+ global _metrics_error_logged
955
+ if _metrics_error_logged:
956
+ return
115
957
 
116
- def add_gcp_telemetry() -> None:
117
- """Inits and adds GCP telemetry exporter."""
118
- add_custom_exporter(GenkitGCPExporter(), 'gcp_telemetry_server')
958
+ error_str = str(error).lower()
959
+ if 'permission' in error_str or 'denied' in error_str or '403' in error_str:
960
+ _metrics_error_logged = True
961
+ logger.error(
962
+ 'Unable to send metrics to Google Cloud. '
963
+ 'Ensure the service account has the "Monitoring Metric Writer" '
964
+ '(roles/monitoring.metricWriter) or "Cloud Telemetry Metrics Writer" '
965
+ '(roles/telemetry.metricsWriter) role. '
966
+ f'Error: {error}'
967
+ )
968
+ else:
969
+ logger.error('Error exporting metrics to GCP', error=str(error))