genkit-plugin-google-cloud 0.3.2__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,605 @@
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ # SPDX-License-Identifier: Apache-2.0
16
+
17
+ """Generate action telemetry for GCP.
18
+
19
+ This module tracks generate action metrics (tokens, latencies) and logs,
20
+ matching the JavaScript implementation in telemetry/generate.ts and Go
21
+ implementation in googlecloud/generate.go.
22
+
23
+ When It Fires:
24
+ The generate telemetry handler is called for spans where:
25
+ - genkit:type = "action"
26
+ - genkit:metadata:subtype = "model"
27
+
28
+ Metrics Recorded:
29
+ ┌─────────────────────────────────────────────────────────────────────────┐
30
+ │ Metric Name │ Type │ Description │
31
+ ├──────────────────────────────────────┼───────────┼──────────────────────┤
32
+ │ genkit/ai/generate/requests │ Counter │ Model call count │
33
+ │ genkit/ai/generate/latency │ Histogram │ Response time (ms) │
34
+ │ genkit/ai/generate/input/tokens │ Counter │ Input token count │
35
+ │ genkit/ai/generate/input/characters │ Counter │ Input char count │
36
+ │ genkit/ai/generate/input/images │ Counter │ Input image count │
37
+ │ genkit/ai/generate/input/videos │ Counter │ Input video count │
38
+ │ genkit/ai/generate/input/audio │ Counter │ Input audio count │
39
+ │ genkit/ai/generate/output/tokens │ Counter │ Output token count │
40
+ │ genkit/ai/generate/output/characters │ Counter │ Output char count │
41
+ │ genkit/ai/generate/output/images │ Counter │ Output image count │
42
+ │ genkit/ai/generate/output/videos │ Counter │ Output video count │
43
+ │ genkit/ai/generate/output/audio │ Counter │ Output audio count │
44
+ │ genkit/ai/generate/thinking/tokens │ Counter │ Thinking token count │
45
+ └──────────────────────────────────────┴───────────┴──────────────────────┘
46
+
47
+ Metric Dimensions:
48
+ All metrics include these dimensions:
49
+ - modelName: The model name (e.g., "gemini-2.0-flash")
50
+ - featureName: The outer flow/feature name
51
+ - path: The qualified Genkit path
52
+ - status: "success" or "failure"
53
+ - error: Error name (only on failure)
54
+ - source: "py" (language identifier)
55
+ - sourceVersion: Genkit version
56
+
57
+ Logs Recorded:
58
+ 1. Config logs (always): Model configuration (maxOutputTokens, stopSequences)
59
+ 2. Input logs (when log_input_and_output=True): Per-message, per-part input
60
+ 3. Output logs (when log_input_and_output=True): Per-part output content
61
+
62
+ Log Format:
63
+ - Config[path, model] - Model configuration
64
+ - Input[path, model] (part X of Y) - Input content with part indices
65
+ - Output[path, model] (part X of Y) - Output content with part indices
66
+
67
+ Media Handling:
68
+ - Data URLs (base64) are hashed with SHA-256 to avoid logging large content
69
+ - Format: "data:image/png;base64,<sha256(hash)>"
70
+
71
+ GCP Documentation:
72
+ Cloud Monitoring Metrics:
73
+ - Custom Metrics: https://cloud.google.com/monitoring/custom-metrics
74
+ - Quotas: https://cloud.google.com/monitoring/quotas
75
+ - Note: Rate limit is 1 point per 5 seconds per time series
76
+
77
+ OpenTelemetry:
78
+ - Python Metrics SDK: https://opentelemetry-python.readthedocs.io/en/stable/sdk/metrics.html
79
+
80
+ Cross-Language Parity:
81
+ - JavaScript: js/plugins/google-cloud/src/telemetry/generate.ts
82
+ - Go: go/plugins/googlecloud/generate.go
83
+ """
84
+
85
+ from __future__ import annotations
86
+
87
+ import contextlib
88
+ import hashlib
89
+ import json
90
+ from typing import Any
91
+
92
+ import structlog
93
+ from opentelemetry import metrics
94
+ from opentelemetry.sdk.trace import ReadableSpan
95
+
96
+ from genkit.core import GENKIT_VERSION
97
+
98
+ from .utils import (
99
+ create_common_log_attributes,
100
+ extract_error_name,
101
+ extract_outer_feature_name_from_path,
102
+ to_display_path,
103
+ truncate,
104
+ truncate_path,
105
+ )
106
+
107
+ logger = structlog.get_logger(__name__)
108
+
109
+ # Lazy-initialized metrics
110
+ _action_counter: metrics.Counter | None = None
111
+ _latency: metrics.Histogram | None = None
112
+ _input_characters: metrics.Counter | None = None
113
+ _input_tokens: metrics.Counter | None = None
114
+ _input_images: metrics.Counter | None = None
115
+ _input_videos: metrics.Counter | None = None
116
+ _input_audio: metrics.Counter | None = None
117
+ _output_characters: metrics.Counter | None = None
118
+ _output_tokens: metrics.Counter | None = None
119
+ _output_images: metrics.Counter | None = None
120
+ _output_videos: metrics.Counter | None = None
121
+ _output_audio: metrics.Counter | None = None
122
+ _thinking_tokens: metrics.Counter | None = None
123
+
124
+
125
+ def _get_meter() -> metrics.Meter:
126
+ return metrics.get_meter('genkit')
127
+
128
+
129
+ def _get_action_counter() -> metrics.Counter:
130
+ global _action_counter
131
+ if _action_counter is None:
132
+ _action_counter = _get_meter().create_counter(
133
+ 'genkit/ai/generate/requests',
134
+ description='Counts calls to genkit generate actions.',
135
+ unit='1',
136
+ )
137
+ return _action_counter
138
+
139
+
140
+ def _get_latency() -> metrics.Histogram:
141
+ global _latency
142
+ if _latency is None:
143
+ _latency = _get_meter().create_histogram(
144
+ 'genkit/ai/generate/latency',
145
+ description='Latencies when interacting with a Genkit model.',
146
+ unit='ms',
147
+ )
148
+ return _latency
149
+
150
+
151
+ def _get_input_characters() -> metrics.Counter:
152
+ global _input_characters
153
+ if _input_characters is None:
154
+ _input_characters = _get_meter().create_counter(
155
+ 'genkit/ai/generate/input/characters',
156
+ description='Counts input characters to any Genkit model.',
157
+ unit='1',
158
+ )
159
+ return _input_characters
160
+
161
+
162
+ def _get_input_tokens() -> metrics.Counter:
163
+ global _input_tokens
164
+ if _input_tokens is None:
165
+ _input_tokens = _get_meter().create_counter(
166
+ 'genkit/ai/generate/input/tokens',
167
+ description='Counts input tokens to a Genkit model.',
168
+ unit='1',
169
+ )
170
+ return _input_tokens
171
+
172
+
173
+ def _get_input_images() -> metrics.Counter:
174
+ global _input_images
175
+ if _input_images is None:
176
+ _input_images = _get_meter().create_counter(
177
+ 'genkit/ai/generate/input/images',
178
+ description='Counts input images to a Genkit model.',
179
+ unit='1',
180
+ )
181
+ return _input_images
182
+
183
+
184
+ def _get_input_videos() -> metrics.Counter:
185
+ """Get or create the input videos counter (Go parity)."""
186
+ global _input_videos
187
+ if _input_videos is None:
188
+ _input_videos = _get_meter().create_counter(
189
+ 'genkit/ai/generate/input/videos',
190
+ description='Counts input videos to a Genkit model.',
191
+ unit='1',
192
+ )
193
+ return _input_videos
194
+
195
+
196
+ def _get_input_audio() -> metrics.Counter:
197
+ """Get or create the input audio counter (Go parity)."""
198
+ global _input_audio
199
+ if _input_audio is None:
200
+ _input_audio = _get_meter().create_counter(
201
+ 'genkit/ai/generate/input/audio',
202
+ description='Counts input audio files to a Genkit model.',
203
+ unit='1',
204
+ )
205
+ return _input_audio
206
+
207
+
208
+ def _get_output_characters() -> metrics.Counter:
209
+ global _output_characters
210
+ if _output_characters is None:
211
+ _output_characters = _get_meter().create_counter(
212
+ 'genkit/ai/generate/output/characters',
213
+ description='Counts output characters from a Genkit model.',
214
+ unit='1',
215
+ )
216
+ return _output_characters
217
+
218
+
219
+ def _get_output_tokens() -> metrics.Counter:
220
+ global _output_tokens
221
+ if _output_tokens is None:
222
+ _output_tokens = _get_meter().create_counter(
223
+ 'genkit/ai/generate/output/tokens',
224
+ description='Counts output tokens from a Genkit model.',
225
+ unit='1',
226
+ )
227
+ return _output_tokens
228
+
229
+
230
+ def _get_output_images() -> metrics.Counter:
231
+ global _output_images
232
+ if _output_images is None:
233
+ _output_images = _get_meter().create_counter(
234
+ 'genkit/ai/generate/output/images',
235
+ description='Count output images from a Genkit model.',
236
+ unit='1',
237
+ )
238
+ return _output_images
239
+
240
+
241
+ def _get_output_videos() -> metrics.Counter:
242
+ """Get or create the output videos counter (Go parity)."""
243
+ global _output_videos
244
+ if _output_videos is None:
245
+ _output_videos = _get_meter().create_counter(
246
+ 'genkit/ai/generate/output/videos',
247
+ description='Counts output videos from a Genkit model.',
248
+ unit='1',
249
+ )
250
+ return _output_videos
251
+
252
+
253
+ def _get_output_audio() -> metrics.Counter:
254
+ """Get or create the output audio counter (Go parity)."""
255
+ global _output_audio
256
+ if _output_audio is None:
257
+ _output_audio = _get_meter().create_counter(
258
+ 'genkit/ai/generate/output/audio',
259
+ description='Counts output audio files from a Genkit model.',
260
+ unit='1',
261
+ )
262
+ return _output_audio
263
+
264
+
265
+ def _get_thinking_tokens() -> metrics.Counter:
266
+ global _thinking_tokens
267
+ if _thinking_tokens is None:
268
+ _thinking_tokens = _get_meter().create_counter(
269
+ 'genkit/ai/generate/thinking/tokens',
270
+ description='Counts thinking tokens from a Genkit model.',
271
+ unit='1',
272
+ )
273
+ return _thinking_tokens
274
+
275
+
276
+ class GenerateTelemetry:
277
+ """Telemetry handler for Genkit generate actions (model calls)."""
278
+
279
+ def tick(
280
+ self,
281
+ span: ReadableSpan,
282
+ log_input_and_output: bool,
283
+ project_id: str | None = None,
284
+ ) -> None:
285
+ """Record telemetry for a generate action span.
286
+
287
+ Args:
288
+ span: The span to record telemetry for.
289
+ log_input_and_output: Whether to log input/output.
290
+ project_id: Optional GCP project ID.
291
+ """
292
+ attrs = span.attributes or {}
293
+ model_name = truncate(str(attrs.get('genkit:name', '<unknown>')), 1024)
294
+ path = str(attrs.get('genkit:path', ''))
295
+
296
+ # Parse input and output from span attributes
297
+ input_data: dict[str, Any] | None = None
298
+ output_data: dict[str, Any] | None = None
299
+
300
+ input_json = attrs.get('genkit:input')
301
+ if input_json and isinstance(input_json, str):
302
+ with contextlib.suppress(json.JSONDecodeError):
303
+ input_data = json.loads(input_json)
304
+
305
+ output_json = attrs.get('genkit:output')
306
+ if output_json and isinstance(output_json, str):
307
+ with contextlib.suppress(json.JSONDecodeError):
308
+ output_data = json.loads(output_json)
309
+
310
+ err_name = extract_error_name(list(span.events))
311
+ feature_name = truncate(
312
+ str(attrs.get('genkit:metadata:flow:name', '')) or extract_outer_feature_name_from_path(path)
313
+ )
314
+ if not feature_name or feature_name == '<unknown>':
315
+ feature_name = 'generate'
316
+
317
+ session_id = str(attrs.get('genkit:sessionId', '')) or None
318
+ thread_name = str(attrs.get('genkit:threadName', '')) or None
319
+
320
+ if input_data:
321
+ self._record_generate_action_metrics(model_name, feature_name, path, output_data, err_name)
322
+ self._record_generate_action_config_logs(
323
+ span, model_name, feature_name, path, input_data, project_id, session_id, thread_name
324
+ )
325
+
326
+ if log_input_and_output:
327
+ self._record_generate_action_input_logs(
328
+ span, model_name, feature_name, path, input_data, project_id, session_id, thread_name
329
+ )
330
+
331
+ if output_data and log_input_and_output:
332
+ self._record_generate_action_output_logs(
333
+ span, model_name, feature_name, path, output_data, project_id, session_id, thread_name
334
+ )
335
+
336
+ def _record_generate_action_metrics(
337
+ self,
338
+ model_name: str,
339
+ feature_name: str,
340
+ path: str,
341
+ response: dict[str, Any] | None,
342
+ err_name: str | None,
343
+ ) -> None:
344
+ """Record metrics for a generate action.
345
+
346
+ Records all generate metrics matching JS/Go parity:
347
+ - requests, latency
348
+ - input: tokens, characters, images, videos, audio
349
+ - output: tokens, characters, images, videos, audio
350
+ - thinking tokens
351
+ """
352
+ usage = response.get('usage', {}) if response else {}
353
+ latency_ms = response.get('latencyMs') if response else None
354
+
355
+ # Note: modelName uses 1024 char limit (matching JS/Go), other dimensions use 256
356
+ shared = {
357
+ 'modelName': model_name[:1024],
358
+ 'featureName': feature_name[:256],
359
+ 'path': path[:256],
360
+ 'source': 'py',
361
+ 'sourceVersion': GENKIT_VERSION,
362
+ 'status': 'failure' if err_name else 'success',
363
+ }
364
+
365
+ error_dims = {'error': err_name[:256]} if err_name else {}
366
+ _get_action_counter().add(1, {**shared, **error_dims})
367
+
368
+ if latency_ms is not None:
369
+ _get_latency().record(latency_ms, shared)
370
+
371
+ # Input metrics
372
+ if usage.get('inputTokens'):
373
+ _get_input_tokens().add(int(usage['inputTokens']), shared)
374
+ if usage.get('inputCharacters'):
375
+ _get_input_characters().add(int(usage['inputCharacters']), shared)
376
+ if usage.get('inputImages'):
377
+ _get_input_images().add(int(usage['inputImages']), shared)
378
+ if usage.get('inputVideos'):
379
+ _get_input_videos().add(int(usage['inputVideos']), shared)
380
+ if usage.get('inputAudio'):
381
+ _get_input_audio().add(int(usage['inputAudio']), shared)
382
+
383
+ # Output metrics
384
+ if usage.get('outputTokens'):
385
+ _get_output_tokens().add(int(usage['outputTokens']), shared)
386
+ if usage.get('outputCharacters'):
387
+ _get_output_characters().add(int(usage['outputCharacters']), shared)
388
+ if usage.get('outputImages'):
389
+ _get_output_images().add(int(usage['outputImages']), shared)
390
+ if usage.get('outputVideos'):
391
+ _get_output_videos().add(int(usage['outputVideos']), shared)
392
+ if usage.get('outputAudio'):
393
+ _get_output_audio().add(int(usage['outputAudio']), shared)
394
+
395
+ # Thinking tokens
396
+ if usage.get('thoughtsTokens'):
397
+ _get_thinking_tokens().add(int(usage['thoughtsTokens']), shared)
398
+
399
+ def _record_generate_action_config_logs(
400
+ self,
401
+ span: ReadableSpan,
402
+ model: str,
403
+ feature_name: str,
404
+ qualified_path: str,
405
+ input_data: dict[str, Any],
406
+ project_id: str | None,
407
+ session_id: str | None,
408
+ thread_name: str | None,
409
+ ) -> None:
410
+ """Log generate action configuration."""
411
+ path = truncate_path(to_display_path(qualified_path))
412
+ metadata = {
413
+ **create_common_log_attributes(span, project_id),
414
+ 'model': model,
415
+ 'path': path,
416
+ 'qualifiedPath': qualified_path,
417
+ 'featureName': feature_name,
418
+ 'source': 'py',
419
+ 'sourceVersion': GENKIT_VERSION,
420
+ }
421
+ if session_id:
422
+ metadata['sessionId'] = session_id
423
+ if thread_name:
424
+ metadata['threadName'] = thread_name
425
+
426
+ config = input_data.get('config', {})
427
+ if config.get('maxOutputTokens'):
428
+ metadata['maxOutputTokens'] = config['maxOutputTokens']
429
+ if config.get('stopSequences'):
430
+ metadata['stopSequences'] = config['stopSequences']
431
+
432
+ logger.info(f'Config[{path}, {model}]', **metadata)
433
+
434
+ def _record_generate_action_input_logs(
435
+ self,
436
+ span: ReadableSpan,
437
+ model: str,
438
+ feature_name: str,
439
+ qualified_path: str,
440
+ input_data: dict[str, Any],
441
+ project_id: str | None,
442
+ session_id: str | None,
443
+ thread_name: str | None,
444
+ ) -> None:
445
+ """Log generate action input messages."""
446
+ path = truncate_path(to_display_path(qualified_path))
447
+ base_metadata = {
448
+ **create_common_log_attributes(span, project_id),
449
+ 'model': model,
450
+ 'path': path,
451
+ 'qualifiedPath': qualified_path,
452
+ 'featureName': feature_name,
453
+ }
454
+ if session_id:
455
+ base_metadata['sessionId'] = session_id
456
+ if thread_name:
457
+ base_metadata['threadName'] = thread_name
458
+
459
+ messages = input_data.get('messages', [])
460
+ total_messages = len(messages)
461
+
462
+ for msg_idx, msg in enumerate(messages):
463
+ role = msg.get('role', 'user')
464
+ content = msg.get('content', [])
465
+ total_parts = len(content)
466
+
467
+ for part_idx, part in enumerate(content):
468
+ part_counts = self._to_part_counts(part_idx, total_parts, msg_idx, total_messages)
469
+ metadata = {
470
+ **base_metadata,
471
+ 'content': self._to_part_log_content(part),
472
+ 'role': role,
473
+ 'partIndex': part_idx,
474
+ 'totalParts': total_parts,
475
+ 'messageIndex': msg_idx,
476
+ 'totalMessages': total_messages,
477
+ }
478
+ logger.info(f'Input[{path}, {model}] {part_counts}', **metadata)
479
+
480
+ def _record_generate_action_output_logs(
481
+ self,
482
+ span: ReadableSpan,
483
+ model: str,
484
+ feature_name: str,
485
+ qualified_path: str,
486
+ output_data: dict[str, Any],
487
+ project_id: str | None,
488
+ session_id: str | None,
489
+ thread_name: str | None,
490
+ ) -> None:
491
+ """Log generate action output."""
492
+ path = truncate_path(to_display_path(qualified_path))
493
+ base_metadata = {
494
+ **create_common_log_attributes(span, project_id),
495
+ 'model': model,
496
+ 'path': path,
497
+ 'qualifiedPath': qualified_path,
498
+ 'featureName': feature_name,
499
+ }
500
+ if session_id:
501
+ base_metadata['sessionId'] = session_id
502
+ if thread_name:
503
+ base_metadata['threadName'] = thread_name
504
+
505
+ message = output_data.get('message') or (output_data.get('candidates', [{}])[0].get('message'))
506
+ if not message or not message.get('content'):
507
+ return
508
+
509
+ content = message.get('content', [])
510
+ total_parts = len(content)
511
+ finish_reason = output_data.get('finishReason')
512
+ finish_message = output_data.get('finishMessage')
513
+
514
+ for part_idx, part in enumerate(content):
515
+ part_counts = self._to_part_counts(part_idx, total_parts, 0, 1)
516
+ metadata = {
517
+ **base_metadata,
518
+ 'content': self._to_part_log_content(part),
519
+ 'role': message.get('role', 'model'),
520
+ 'partIndex': part_idx,
521
+ 'totalParts': total_parts,
522
+ 'candidateIndex': 0,
523
+ 'totalCandidates': 1,
524
+ 'messageIndex': 0,
525
+ 'finishReason': finish_reason,
526
+ }
527
+ if finish_message:
528
+ metadata['finishMessage'] = truncate(finish_message)
529
+
530
+ logger.info(f'Output[{path}, {model}] {part_counts}', **metadata)
531
+
532
+ def _to_part_counts(
533
+ self,
534
+ part_ordinal: int,
535
+ parts: int,
536
+ msg_ordinal: int,
537
+ messages: int,
538
+ ) -> str:
539
+ """Format part counts for log messages."""
540
+ if parts > 1 and messages > 1:
541
+ return f'(part {self._x_of_y(part_ordinal, parts)} in message {self._x_of_y(msg_ordinal, messages)})'
542
+ if parts > 1:
543
+ return f'(part {self._x_of_y(part_ordinal, parts)})'
544
+ if messages > 1:
545
+ return f'(message {self._x_of_y(msg_ordinal, messages)})'
546
+ return ''
547
+
548
+ def _x_of_y(self, x: int, y: int) -> str:
549
+ """Format 'X of Y' string."""
550
+ return f'{x + 1} of {y}'
551
+
552
+ def _to_part_log_content(self, part: dict[str, Any]) -> str:
553
+ """Convert a part to log-safe content."""
554
+ if part.get('text'):
555
+ return truncate(str(part['text']))
556
+ if part.get('data'):
557
+ return truncate(json.dumps(part['data']))
558
+ if part.get('media'):
559
+ return self._to_part_log_media(part)
560
+ if part.get('toolRequest'):
561
+ return self._to_part_log_tool_request(part)
562
+ if part.get('toolResponse'):
563
+ return self._to_part_log_tool_response(part)
564
+ if part.get('custom'):
565
+ return truncate(json.dumps(part['custom']))
566
+ return '<unknown format>'
567
+
568
+ def _to_part_log_media(self, part: dict[str, Any]) -> str:
569
+ """Convert media part to log-safe content."""
570
+ media = part.get('media', {})
571
+ url = media.get('url', '')
572
+
573
+ if url.startswith('data:'):
574
+ split_idx = url.find('base64,')
575
+ if split_idx < 0:
576
+ return '<unknown media format>'
577
+ prefix = url[: split_idx + 7]
578
+ hashed = hashlib.sha256(url[split_idx + 7 :].encode()).hexdigest()
579
+ return f'{prefix}<sha256({hashed})>'
580
+
581
+ return truncate(url)
582
+
583
+ def _to_part_log_tool_request(self, part: dict[str, Any]) -> str:
584
+ """Convert tool request part to log-safe content."""
585
+ req = part.get('toolRequest', {})
586
+ name = req.get('name', '')
587
+ ref = req.get('ref', '')
588
+ input_val = req.get('input', '')
589
+ if not isinstance(input_val, str):
590
+ input_val = json.dumps(input_val)
591
+ return truncate(f'Tool request: {name}, ref: {ref}, input: {input_val}')
592
+
593
+ def _to_part_log_tool_response(self, part: dict[str, Any]) -> str:
594
+ """Convert tool response part to log-safe content."""
595
+ resp = part.get('toolResponse', {})
596
+ name = resp.get('name', '')
597
+ ref = resp.get('ref', '')
598
+ output_val = resp.get('output', '')
599
+ if not isinstance(output_val, str):
600
+ output_val = json.dumps(output_val)
601
+ return truncate(f'Tool response: {name}, ref: {ref}, output: {output_val}')
602
+
603
+
604
+ # Singleton instance
605
+ generate_telemetry = GenerateTelemetry()