airbyte-internal-ops 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,9 @@ from airbyte.exceptions import PyAirbyteInputError
10
10
  MCP_SERVER_NAME = "airbyte-internal-ops"
11
11
  """The name of the MCP server."""
12
12
 
13
+ USER_AGENT = "Airbyte-Internal-Ops Python client"
14
+ """User-Agent string for HTTP requests to Airbyte Cloud APIs."""
15
+
13
16
  # Environment variable names for internal admin authentication
14
17
  ENV_AIRBYTE_INTERNAL_ADMIN_FLAG = "AIRBYTE_INTERNAL_ADMIN_FLAG"
15
18
  ENV_AIRBYTE_INTERNAL_ADMIN_USER = "AIRBYTE_INTERNAL_ADMIN_USER"
@@ -0,0 +1,18 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+ """GCP Cloud Logging utilities for fetching error details by error ID."""
3
+
4
+ from airbyte_ops_mcp.gcp_logs.error_lookup import (
5
+ GCPLogEntry,
6
+ GCPLogPayload,
7
+ GCPLogSearchResult,
8
+ GCPSeverity,
9
+ fetch_error_logs,
10
+ )
11
+
12
+ __all__ = [
13
+ "GCPLogEntry",
14
+ "GCPLogPayload",
15
+ "GCPLogSearchResult",
16
+ "GCPSeverity",
17
+ "fetch_error_logs",
18
+ ]
@@ -0,0 +1,383 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+ """Fetch full stack traces from Google Cloud Logs by error ID.
3
+
4
+ This module provides functionality to look up error details from GCP Cloud Logging
5
+ using an error ID (UUID). This is useful for debugging API errors that return
6
+ only an error ID in the response.
7
+
8
+ Example:
9
+ from airbyte_ops_mcp.gcp_logs import fetch_error_logs
10
+
11
+ result = fetch_error_logs(
12
+ error_id="3173452e-8f22-4286-a1ec-b0f16c1e078a",
13
+ project="prod-ab-cloud-proj",
14
+ lookback_days=7,
15
+ )
16
+ for entry in result.entries:
17
+ print(entry.message)
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import re
23
+ from datetime import UTC, datetime, timedelta
24
+ from enum import StrEnum
25
+ from typing import Any
26
+
27
+ from google.cloud import logging
28
+ from google.cloud.logging_v2 import entries
29
+ from pydantic import BaseModel, Field
30
+
31
+ # Default GCP project for Airbyte Cloud
32
+ DEFAULT_GCP_PROJECT = "prod-ab-cloud-proj"
33
+
34
+
35
+ class GCPSeverity(StrEnum):
36
+ """Valid GCP Cloud Logging severity levels."""
37
+
38
+ DEBUG = "DEBUG"
39
+ INFO = "INFO"
40
+ NOTICE = "NOTICE"
41
+ WARNING = "WARNING"
42
+ ERROR = "ERROR"
43
+ CRITICAL = "CRITICAL"
44
+ ALERT = "ALERT"
45
+ EMERGENCY = "EMERGENCY"
46
+
47
+
48
+ class GCPLogResourceLabels(BaseModel):
49
+ """Resource labels from a GCP log entry."""
50
+
51
+ pod_name: str | None = Field(default=None, description="Kubernetes pod name")
52
+ container_name: str | None = Field(
53
+ default=None, description="Container name within the pod"
54
+ )
55
+ namespace_name: str | None = Field(default=None, description="Kubernetes namespace")
56
+ cluster_name: str | None = Field(default=None, description="GKE cluster name")
57
+
58
+
59
+ class GCPLogResource(BaseModel):
60
+ """Resource information from a GCP log entry."""
61
+
62
+ type: str | None = Field(default=None, description="Resource type")
63
+ labels: GCPLogResourceLabels = Field(
64
+ default_factory=GCPLogResourceLabels, description="Resource labels"
65
+ )
66
+
67
+
68
+ class GCPLogSourceLocation(BaseModel):
69
+ """Source location information from a GCP log entry."""
70
+
71
+ file: str | None = Field(default=None, description="Source file path")
72
+ line: int | None = Field(default=None, description="Line number")
73
+ function: str | None = Field(default=None, description="Function name")
74
+
75
+
76
+ class GCPLogEntry(BaseModel):
77
+ """A single log entry from GCP Cloud Logging."""
78
+
79
+ timestamp: datetime | None = Field(
80
+ default=None, description="When the log entry was created"
81
+ )
82
+ severity: str | None = Field(
83
+ default=None, description="Log severity (DEBUG, INFO, WARNING, ERROR, etc.)"
84
+ )
85
+ log_name: str | None = Field(default=None, description="Full log name path")
86
+ insert_id: str | None = Field(
87
+ default=None, description="Unique identifier for the log entry"
88
+ )
89
+ trace: str | None = Field(
90
+ default=None, description="Trace ID for distributed tracing"
91
+ )
92
+ span_id: str | None = Field(default=None, description="Span ID within the trace")
93
+ payload: Any = Field(default=None, description="Log entry payload (text or struct)")
94
+ payload_type: str | None = Field(
95
+ default=None, description="Type of payload (text, struct, protobuf)"
96
+ )
97
+ resource: GCPLogResource = Field(
98
+ default_factory=GCPLogResource, description="Resource information"
99
+ )
100
+ source_location: GCPLogSourceLocation | None = Field(
101
+ default=None, description="Source code location"
102
+ )
103
+ labels: dict[str, str] = Field(
104
+ default_factory=dict, description="User-defined labels"
105
+ )
106
+
107
+
108
+ class GCPLogPayload(BaseModel):
109
+ """Extracted and combined payload from grouped log entries."""
110
+
111
+ timestamp: datetime | None = Field(
112
+ default=None, description="Timestamp of the first entry in the group"
113
+ )
114
+ severity: str | None = Field(default=None, description="Severity of the log group")
115
+ resource: GCPLogResource = Field(
116
+ default_factory=GCPLogResource, description="Resource information"
117
+ )
118
+ num_log_lines: int = Field(
119
+ default=0, description="Number of log lines combined into this payload"
120
+ )
121
+ message: str = Field(default="", description="Combined message from all log lines")
122
+
123
+
124
+ class GCPLogSearchResult(BaseModel):
125
+ """Result of searching GCP Cloud Logging for an error ID."""
126
+
127
+ error_id: str = Field(description="The error ID that was searched for")
128
+ project: str = Field(description="GCP project that was searched")
129
+ lookback_days_searched: int = Field(
130
+ description="Number of lookback days that were searched"
131
+ )
132
+ total_entries_found: int = Field(
133
+ description="Total number of log entries found (including related entries)"
134
+ )
135
+ entries: list[GCPLogEntry] = Field(
136
+ default_factory=list, description="Raw log entries found"
137
+ )
138
+ payloads: list[GCPLogPayload] = Field(
139
+ default_factory=list,
140
+ description="Extracted and grouped payloads (reconstructed stack traces)",
141
+ )
142
+
143
+
144
+ def _build_filter(
145
+ error_id: str,
146
+ lookback_days: int,
147
+ min_severity_filter: GCPSeverity | None,
148
+ ) -> str:
149
+ """Build the Cloud Logging filter query."""
150
+ filter_parts = [f'"{error_id}"']
151
+
152
+ start_time = datetime.now(UTC) - timedelta(days=lookback_days)
153
+ filter_parts.append(f'timestamp >= "{start_time.isoformat()}"')
154
+
155
+ if min_severity_filter:
156
+ filter_parts.append(f"severity>={min_severity_filter}")
157
+
158
+ return " AND ".join(filter_parts)
159
+
160
+
161
+ def _entry_to_model(
162
+ entry: entries.StructEntry | entries.TextEntry | entries.ProtobufEntry,
163
+ ) -> GCPLogEntry:
164
+ """Convert a GCP log entry to a Pydantic model."""
165
+ resource_labels = {}
166
+ if entry.resource and entry.resource.labels:
167
+ resource_labels = dict(entry.resource.labels)
168
+
169
+ resource = GCPLogResource(
170
+ type=entry.resource.type if entry.resource else None,
171
+ labels=GCPLogResourceLabels(
172
+ pod_name=resource_labels.get("pod_name"),
173
+ container_name=resource_labels.get("container_name"),
174
+ namespace_name=resource_labels.get("namespace_name"),
175
+ cluster_name=resource_labels.get("cluster_name"),
176
+ ),
177
+ )
178
+
179
+ source_location = None
180
+ if entry.source_location:
181
+ source_location = GCPLogSourceLocation(
182
+ file=entry.source_location.get("file"),
183
+ line=entry.source_location.get("line"),
184
+ function=entry.source_location.get("function"),
185
+ )
186
+
187
+ payload: Any = None
188
+ payload_type = "unknown"
189
+ if isinstance(entry, entries.StructEntry):
190
+ payload = entry.payload
191
+ payload_type = "struct"
192
+ elif isinstance(entry, entries.TextEntry):
193
+ payload = entry.payload
194
+ payload_type = "text"
195
+ elif isinstance(entry, entries.ProtobufEntry):
196
+ payload = str(entry.payload)
197
+ payload_type = "protobuf"
198
+
199
+ return GCPLogEntry(
200
+ timestamp=entry.timestamp,
201
+ severity=entry.severity,
202
+ log_name=entry.log_name,
203
+ insert_id=entry.insert_id,
204
+ trace=entry.trace,
205
+ span_id=entry.span_id,
206
+ payload=payload,
207
+ payload_type=payload_type,
208
+ resource=resource,
209
+ source_location=source_location,
210
+ labels=dict(entry.labels) if entry.labels else {},
211
+ )
212
+
213
+
214
+ def _group_entries_by_occurrence(
215
+ log_entries: list[GCPLogEntry],
216
+ ) -> list[list[GCPLogEntry]]:
217
+ """Group log entries by occurrence (timestamp clusters within 1 second)."""
218
+ if not log_entries:
219
+ return []
220
+
221
+ sorted_entries = sorted(
222
+ log_entries, key=lambda x: x.timestamp or datetime.min.replace(tzinfo=UTC)
223
+ )
224
+
225
+ groups: list[list[GCPLogEntry]] = []
226
+ current_group = [sorted_entries[0]]
227
+ current_timestamp = sorted_entries[0].timestamp or datetime.min.replace(tzinfo=UTC)
228
+
229
+ for entry in sorted_entries[1:]:
230
+ entry_timestamp = entry.timestamp or datetime.min.replace(tzinfo=UTC)
231
+ time_diff = abs((entry_timestamp - current_timestamp).total_seconds())
232
+
233
+ current_pod = current_group[0].resource.labels.pod_name
234
+ entry_pod = entry.resource.labels.pod_name
235
+
236
+ if time_diff <= 1 and entry_pod == current_pod:
237
+ current_group.append(entry)
238
+ else:
239
+ groups.append(current_group)
240
+ current_group = [entry]
241
+ current_timestamp = entry_timestamp
242
+
243
+ if current_group:
244
+ groups.append(current_group)
245
+
246
+ return groups
247
+
248
+
249
+ def _extract_payloads(log_entries: list[GCPLogEntry]) -> list[GCPLogPayload]:
250
+ """Extract and group payloads by occurrence."""
251
+ if not log_entries:
252
+ return []
253
+
254
+ grouped = _group_entries_by_occurrence(log_entries)
255
+
256
+ results = []
257
+ for group in grouped:
258
+ payloads = []
259
+ for entry in group:
260
+ if entry.payload:
261
+ payload_text = str(entry.payload)
262
+ payload_text = re.sub(r"\x1b\[[0-9;]*m", "", payload_text)
263
+ payloads.append(payload_text)
264
+
265
+ combined_message = "\n".join(payloads)
266
+
267
+ first_entry = group[0]
268
+ result = GCPLogPayload(
269
+ timestamp=first_entry.timestamp,
270
+ severity=first_entry.severity,
271
+ resource=first_entry.resource,
272
+ num_log_lines=len(group),
273
+ message=combined_message,
274
+ )
275
+ results.append(result)
276
+
277
+ return results
278
+
279
+
280
+ def fetch_error_logs(
281
+ error_id: str,
282
+ project: str = DEFAULT_GCP_PROJECT,
283
+ lookback_days: int = 7,
284
+ min_severity_filter: GCPSeverity | None = None,
285
+ include_log_envelope_seconds: float = 1.0,
286
+ max_log_entries: int | None = None,
287
+ ) -> GCPLogSearchResult:
288
+ """Fetch logs from Google Cloud Logging by error ID.
289
+
290
+ This function searches GCP Cloud Logging for log entries containing the
291
+ specified error ID, then fetches related log entries (multi-line stack traces)
292
+ from the same timestamp and resource.
293
+ """
294
+ client_options = {"quota_project_id": project}
295
+ client = logging.Client(project=project, client_options=client_options)
296
+
297
+ filter_str = _build_filter(error_id, lookback_days, min_severity_filter)
298
+
299
+ entries_iterator = client.list_entries(
300
+ filter_=filter_str,
301
+ order_by=logging.DESCENDING,
302
+ )
303
+
304
+ initial_matches = list(entries_iterator)
305
+
306
+ if not initial_matches:
307
+ return GCPLogSearchResult(
308
+ error_id=error_id,
309
+ project=project,
310
+ lookback_days_searched=lookback_days,
311
+ total_entries_found=0,
312
+ entries=[],
313
+ payloads=[],
314
+ )
315
+
316
+ all_results: list[GCPLogEntry] = []
317
+ seen_insert_ids: set[str] = set()
318
+
319
+ for match in initial_matches:
320
+ timestamp = match.timestamp
321
+ resource_type_val = match.resource.type if match.resource else None
322
+ resource_labels = (
323
+ dict(match.resource.labels)
324
+ if match.resource and match.resource.labels
325
+ else {}
326
+ )
327
+ log_name = match.log_name
328
+
329
+ start_time = timestamp - timedelta(seconds=include_log_envelope_seconds)
330
+ end_time = timestamp + timedelta(seconds=include_log_envelope_seconds)
331
+
332
+ related_filter_parts = [
333
+ f'timestamp >= "{start_time.isoformat()}"',
334
+ f'timestamp <= "{end_time.isoformat()}"',
335
+ ]
336
+
337
+ if log_name:
338
+ related_filter_parts.append(f'logName="{log_name}"')
339
+
340
+ if resource_type_val:
341
+ related_filter_parts.append(f'resource.type="{resource_type_val}"')
342
+
343
+ if "pod_name" in resource_labels:
344
+ related_filter_parts.append(
345
+ f'resource.labels.pod_name="{resource_labels["pod_name"]}"'
346
+ )
347
+ if "container_name" in resource_labels:
348
+ related_filter_parts.append(
349
+ f'resource.labels.container_name="{resource_labels["container_name"]}"'
350
+ )
351
+
352
+ # Note: resource_type_val is extracted from the matched entry, and
353
+ # min_severity_filter is already applied in the initial search filter
354
+
355
+ related_filter = " AND ".join(related_filter_parts)
356
+
357
+ related_entries = client.list_entries(
358
+ filter_=related_filter,
359
+ order_by=logging.ASCENDING,
360
+ )
361
+
362
+ for entry in related_entries:
363
+ if entry.insert_id and entry.insert_id not in seen_insert_ids:
364
+ seen_insert_ids.add(entry.insert_id)
365
+ all_results.append(_entry_to_model(entry))
366
+
367
+ all_results.sort(
368
+ key=lambda x: x.timestamp or datetime.min.replace(tzinfo=UTC), reverse=True
369
+ )
370
+
371
+ if max_log_entries:
372
+ all_results = all_results[:max_log_entries]
373
+
374
+ payloads = _extract_payloads(all_results)
375
+
376
+ return GCPLogSearchResult(
377
+ error_id=error_id,
378
+ project=project,
379
+ lookback_days_searched=lookback_days,
380
+ total_entries_found=len(all_results),
381
+ entries=all_results,
382
+ payloads=payloads,
383
+ )
@@ -121,6 +121,7 @@ def get_cloud_connector_version(
121
121
 
122
122
  # Use vendored API client instead of connector.get_connector_version()
123
123
  # Use Config API root for version management operations
124
+ # Pass workspace_id to get detailed scoped configuration context
124
125
  version_data = api_client.get_connector_version(
125
126
  connector_id=actor_id,
126
127
  connector_type=actor_type,
@@ -128,13 +129,31 @@ def get_cloud_connector_version(
128
129
  client_id=auth.client_id,
129
130
  client_secret=auth.client_secret,
130
131
  bearer_token=auth.bearer_token,
132
+ workspace_id=workspace_id,
133
+ )
134
+
135
+ # Determine if version is pinned from scoped config context (more reliable)
136
+ # The API's isVersionOverrideApplied only returns true for USER-created pins,
137
+ # not system-generated pins (e.g., breaking_change origin). Check scopedConfigs
138
+ # for a more accurate picture of whether ANY pin exists.
139
+ scoped_configs = version_data.get("scopedConfigs", {})
140
+ has_any_pin = (
141
+ any(config is not None for config in scoped_configs.values())
142
+ if scoped_configs
143
+ else False
144
+ )
145
+
146
+ # Use scoped config existence as the source of truth for "is pinned"
147
+ # Fall back to API's isVersionOverrideApplied if no scoped config data
148
+ is_pinned = (
149
+ has_any_pin if scoped_configs else version_data["isVersionOverrideApplied"]
131
150
  )
132
151
 
133
152
  return ConnectorVersionInfo(
134
153
  connector_id=actor_id,
135
154
  connector_type=actor_type,
136
155
  version=version_data["dockerImageTag"],
137
- is_version_pinned=version_data["isVersionOverrideApplied"],
156
+ is_version_pinned=is_pinned,
138
157
  )
139
158
  except CloudAuthError:
140
159
  raise
@@ -0,0 +1,92 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+ """MCP tools for GCP Cloud Logging operations.
3
+
4
+ This module provides MCP tools for querying GCP Cloud Logging,
5
+ particularly for looking up error details by error ID.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Annotated
11
+
12
+ from fastmcp import FastMCP
13
+ from pydantic import Field
14
+
15
+ from airbyte_ops_mcp.gcp_logs import (
16
+ GCPLogSearchResult,
17
+ GCPSeverity,
18
+ fetch_error_logs,
19
+ )
20
+ from airbyte_ops_mcp.gcp_logs.error_lookup import DEFAULT_GCP_PROJECT
21
+ from airbyte_ops_mcp.mcp._mcp_utils import mcp_tool, register_mcp_tools
22
+
23
+
24
+ @mcp_tool(
25
+ read_only=True,
26
+ idempotent=True,
27
+ )
28
+ def lookup_cloud_backend_error(
29
+ error_id: Annotated[
30
+ str,
31
+ Field(
32
+ description=(
33
+ "The error ID (UUID) to search for. This is typically returned "
34
+ "in API error responses as {'errorId': '...'}"
35
+ )
36
+ ),
37
+ ],
38
+ project: Annotated[
39
+ str,
40
+ Field(
41
+ default=DEFAULT_GCP_PROJECT,
42
+ description=(
43
+ "GCP project ID to search in. Defaults to 'prod-ab-cloud-proj' "
44
+ "(Airbyte Cloud production)."
45
+ ),
46
+ ),
47
+ ],
48
+ lookback_days: Annotated[
49
+ int,
50
+ Field(
51
+ default=7,
52
+ description="Number of days to look back in logs. Defaults to 7.",
53
+ ),
54
+ ],
55
+ min_severity_filter: Annotated[
56
+ GCPSeverity | None,
57
+ Field(
58
+ default=None,
59
+ description="Optional minimum severity level to filter logs.",
60
+ ),
61
+ ],
62
+ max_log_entries: Annotated[
63
+ int,
64
+ Field(
65
+ default=200,
66
+ description="Maximum number of log entries to return. Defaults to 200.",
67
+ ),
68
+ ],
69
+ ) -> GCPLogSearchResult:
70
+ """Look up error details from GCP Cloud Logging by error ID.
71
+
72
+ When an Airbyte Cloud API returns an error response with only an error ID
73
+ (e.g., {"errorId": "3173452e-8f22-4286-a1ec-b0f16c1e078a"}), this tool
74
+ fetches the full stack trace and error details from GCP Cloud Logging.
75
+
76
+ The tool searches for log entries containing the error ID and fetches
77
+ related entries (multi-line stack traces) from the same timestamp and pod.
78
+
79
+ Requires GCP credentials with Logs Viewer role on the target project.
80
+ """
81
+ return fetch_error_logs(
82
+ error_id=error_id,
83
+ project=project,
84
+ lookback_days=lookback_days,
85
+ min_severity_filter=min_severity_filter,
86
+ max_log_entries=max_log_entries,
87
+ )
88
+
89
+
90
+ def register_gcp_logs_tools(app: FastMCP) -> None:
91
+ """Register GCP logs tools with the FastMCP app."""
92
+ register_mcp_tools(app)
@@ -24,6 +24,7 @@ from airbyte_ops_mcp.constants import MCP_SERVER_NAME
24
24
  from airbyte_ops_mcp.mcp.cloud_connector_versions import (
25
25
  register_cloud_connector_version_tools,
26
26
  )
27
+ from airbyte_ops_mcp.mcp.gcp_logs import register_gcp_logs_tools
27
28
  from airbyte_ops_mcp.mcp.github import register_github_tools
28
29
  from airbyte_ops_mcp.mcp.github_repo_ops import register_github_repo_ops_tools
29
30
  from airbyte_ops_mcp.mcp.prerelease import register_prerelease_tools
@@ -62,6 +63,7 @@ def register_server_assets(app: FastMCP) -> None:
62
63
  register_prerelease_tools(app)
63
64
  register_cloud_connector_version_tools(app)
64
65
  register_prod_db_query_tools(app)
66
+ register_gcp_logs_tools(app)
65
67
  register_prompts(app)
66
68
  register_regression_tests_tools(app)
67
69