airbyte-agent-stripe 0.5.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of airbyte-agent-stripe might be problematic. Click here for more details.
- airbyte_agent_stripe/__init__.py +237 -0
- airbyte_agent_stripe/_vendored/__init__.py +1 -0
- airbyte_agent_stripe/_vendored/connector_sdk/__init__.py +82 -0
- airbyte_agent_stripe/_vendored/connector_sdk/auth_strategies.py +1123 -0
- airbyte_agent_stripe/_vendored/connector_sdk/auth_template.py +135 -0
- airbyte_agent_stripe/_vendored/connector_sdk/cloud_utils/__init__.py +5 -0
- airbyte_agent_stripe/_vendored/connector_sdk/cloud_utils/client.py +213 -0
- airbyte_agent_stripe/_vendored/connector_sdk/connector_model_loader.py +957 -0
- airbyte_agent_stripe/_vendored/connector_sdk/constants.py +78 -0
- airbyte_agent_stripe/_vendored/connector_sdk/exceptions.py +23 -0
- airbyte_agent_stripe/_vendored/connector_sdk/executor/__init__.py +31 -0
- airbyte_agent_stripe/_vendored/connector_sdk/executor/hosted_executor.py +197 -0
- airbyte_agent_stripe/_vendored/connector_sdk/executor/local_executor.py +1504 -0
- airbyte_agent_stripe/_vendored/connector_sdk/executor/models.py +190 -0
- airbyte_agent_stripe/_vendored/connector_sdk/extensions.py +655 -0
- airbyte_agent_stripe/_vendored/connector_sdk/http/__init__.py +37 -0
- airbyte_agent_stripe/_vendored/connector_sdk/http/adapters/__init__.py +9 -0
- airbyte_agent_stripe/_vendored/connector_sdk/http/adapters/httpx_adapter.py +251 -0
- airbyte_agent_stripe/_vendored/connector_sdk/http/config.py +98 -0
- airbyte_agent_stripe/_vendored/connector_sdk/http/exceptions.py +119 -0
- airbyte_agent_stripe/_vendored/connector_sdk/http/protocols.py +114 -0
- airbyte_agent_stripe/_vendored/connector_sdk/http/response.py +102 -0
- airbyte_agent_stripe/_vendored/connector_sdk/http_client.py +686 -0
- airbyte_agent_stripe/_vendored/connector_sdk/logging/__init__.py +11 -0
- airbyte_agent_stripe/_vendored/connector_sdk/logging/logger.py +264 -0
- airbyte_agent_stripe/_vendored/connector_sdk/logging/types.py +92 -0
- airbyte_agent_stripe/_vendored/connector_sdk/observability/__init__.py +11 -0
- airbyte_agent_stripe/_vendored/connector_sdk/observability/models.py +19 -0
- airbyte_agent_stripe/_vendored/connector_sdk/observability/redactor.py +81 -0
- airbyte_agent_stripe/_vendored/connector_sdk/observability/session.py +94 -0
- airbyte_agent_stripe/_vendored/connector_sdk/performance/__init__.py +6 -0
- airbyte_agent_stripe/_vendored/connector_sdk/performance/instrumentation.py +57 -0
- airbyte_agent_stripe/_vendored/connector_sdk/performance/metrics.py +93 -0
- airbyte_agent_stripe/_vendored/connector_sdk/schema/__init__.py +75 -0
- airbyte_agent_stripe/_vendored/connector_sdk/schema/base.py +161 -0
- airbyte_agent_stripe/_vendored/connector_sdk/schema/components.py +238 -0
- airbyte_agent_stripe/_vendored/connector_sdk/schema/connector.py +131 -0
- airbyte_agent_stripe/_vendored/connector_sdk/schema/extensions.py +109 -0
- airbyte_agent_stripe/_vendored/connector_sdk/schema/operations.py +146 -0
- airbyte_agent_stripe/_vendored/connector_sdk/schema/security.py +213 -0
- airbyte_agent_stripe/_vendored/connector_sdk/secrets.py +182 -0
- airbyte_agent_stripe/_vendored/connector_sdk/telemetry/__init__.py +10 -0
- airbyte_agent_stripe/_vendored/connector_sdk/telemetry/config.py +32 -0
- airbyte_agent_stripe/_vendored/connector_sdk/telemetry/events.py +58 -0
- airbyte_agent_stripe/_vendored/connector_sdk/telemetry/tracker.py +151 -0
- airbyte_agent_stripe/_vendored/connector_sdk/types.py +241 -0
- airbyte_agent_stripe/_vendored/connector_sdk/utils.py +60 -0
- airbyte_agent_stripe/_vendored/connector_sdk/validation.py +822 -0
- airbyte_agent_stripe/connector.py +1579 -0
- airbyte_agent_stripe/connector_model.py +14869 -0
- airbyte_agent_stripe/models.py +2353 -0
- airbyte_agent_stripe/types.py +295 -0
- airbyte_agent_stripe-0.5.25.dist-info/METADATA +110 -0
- airbyte_agent_stripe-0.5.25.dist-info/RECORD +55 -0
- airbyte_agent_stripe-0.5.25.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
"""Request/response logging implementation."""
|
|
2
|
+
|
|
3
|
+
import base64
|
|
4
|
+
import json
|
|
5
|
+
import time
|
|
6
|
+
import uuid
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Dict, Optional, Set
|
|
9
|
+
|
|
10
|
+
from .types import LogSession, RequestLog
|
|
11
|
+
|
|
12
|
+
# Headers to redact for security
|
|
13
|
+
SENSITIVE_HEADERS: Set[str] = {
|
|
14
|
+
"authorization",
|
|
15
|
+
"bearer",
|
|
16
|
+
"api-key",
|
|
17
|
+
"x-api-key",
|
|
18
|
+
"token",
|
|
19
|
+
"secret",
|
|
20
|
+
"password",
|
|
21
|
+
"credential",
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class RequestLogger:
|
|
26
|
+
"""Captures HTTP request/response interactions to a JSON file.
|
|
27
|
+
|
|
28
|
+
Implements bounded logging with automatic rotation and flush-before-discard
|
|
29
|
+
to prevent unbounded memory growth in long-running processes.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
log_file: Optional[str] = None,
|
|
35
|
+
connector_name: Optional[str] = None,
|
|
36
|
+
max_logs: Optional[int] = 10000,
|
|
37
|
+
):
|
|
38
|
+
"""
|
|
39
|
+
Initialize the request logger.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
log_file: Path to write logs. If None, generates timestamped filename.
|
|
43
|
+
connector_name: Name of the connector being logged.
|
|
44
|
+
max_logs: Maximum number of logs to keep in memory before rotation.
|
|
45
|
+
Set to None for unlimited (not recommended for production).
|
|
46
|
+
Defaults to 10000.
|
|
47
|
+
"""
|
|
48
|
+
if log_file is None:
|
|
49
|
+
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
|
50
|
+
log_file = f".logs/session_{timestamp}.json"
|
|
51
|
+
|
|
52
|
+
self.log_file = Path(log_file)
|
|
53
|
+
self.log_file.parent.mkdir(parents=True, exist_ok=True)
|
|
54
|
+
|
|
55
|
+
self.session = LogSession(
|
|
56
|
+
session_id=str(uuid.uuid4()),
|
|
57
|
+
connector_name=connector_name,
|
|
58
|
+
max_logs=max_logs,
|
|
59
|
+
)
|
|
60
|
+
self._active_requests: Dict[str, Dict[str, Any]] = {}
|
|
61
|
+
# Store rotated logs that have been flushed from active buffer
|
|
62
|
+
self._rotated_logs: list[RequestLog] = []
|
|
63
|
+
|
|
64
|
+
def _redact_headers(self, headers: Dict[str, str]) -> Dict[str, str]:
|
|
65
|
+
"""Redact sensitive headers."""
|
|
66
|
+
redacted = {}
|
|
67
|
+
for key, value in headers.items():
|
|
68
|
+
if any(sensitive in key.lower() for sensitive in SENSITIVE_HEADERS):
|
|
69
|
+
redacted[key] = "[REDACTED]"
|
|
70
|
+
else:
|
|
71
|
+
redacted[key] = value
|
|
72
|
+
return redacted
|
|
73
|
+
|
|
74
|
+
def _rotate_logs_if_needed(self) -> None:
|
|
75
|
+
"""Rotate logs if max_logs limit is reached.
|
|
76
|
+
|
|
77
|
+
Moves oldest logs to _rotated_logs before removing them from active buffer.
|
|
78
|
+
This ensures logs are preserved for final save() without memory growth.
|
|
79
|
+
"""
|
|
80
|
+
max_logs = self.session.max_logs
|
|
81
|
+
if max_logs is None:
|
|
82
|
+
# Unlimited logging, no rotation needed
|
|
83
|
+
return
|
|
84
|
+
|
|
85
|
+
current_count = len(self.session.logs)
|
|
86
|
+
if current_count >= max_logs:
|
|
87
|
+
# Calculate how many logs to rotate (keep buffer at ~90% to avoid thrashing)
|
|
88
|
+
num_to_rotate = max(1, current_count - int(max_logs * 0.9))
|
|
89
|
+
|
|
90
|
+
# Move oldest logs to rotated buffer
|
|
91
|
+
rotated = self.session.logs[:num_to_rotate]
|
|
92
|
+
self._rotated_logs.extend(rotated)
|
|
93
|
+
|
|
94
|
+
# Remove rotated logs from active buffer
|
|
95
|
+
self.session.logs = self.session.logs[num_to_rotate:]
|
|
96
|
+
|
|
97
|
+
def log_request(
|
|
98
|
+
self,
|
|
99
|
+
method: str,
|
|
100
|
+
url: str,
|
|
101
|
+
path: str,
|
|
102
|
+
headers: Optional[Dict[str, str]] = None,
|
|
103
|
+
params: Optional[Dict[str, Any]] = None,
|
|
104
|
+
body: Optional[Any] = None,
|
|
105
|
+
) -> str:
|
|
106
|
+
"""
|
|
107
|
+
Log the start of an HTTP request.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
method: HTTP method (GET, POST, etc.)
|
|
111
|
+
url: Full URL
|
|
112
|
+
path: Request path
|
|
113
|
+
headers: Request headers
|
|
114
|
+
params: Query parameters
|
|
115
|
+
body: Request body
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
Request ID for correlating with response
|
|
119
|
+
"""
|
|
120
|
+
request_id = str(uuid.uuid4())
|
|
121
|
+
self._active_requests[request_id] = {
|
|
122
|
+
"start_time": time.time(),
|
|
123
|
+
"method": method,
|
|
124
|
+
"url": url,
|
|
125
|
+
"path": path,
|
|
126
|
+
"headers": self._redact_headers(headers or {}),
|
|
127
|
+
"params": params,
|
|
128
|
+
"body": body,
|
|
129
|
+
}
|
|
130
|
+
return request_id
|
|
131
|
+
|
|
132
|
+
def log_response(
|
|
133
|
+
self,
|
|
134
|
+
request_id: str,
|
|
135
|
+
status_code: int,
|
|
136
|
+
response_body: Optional[Any] = None,
|
|
137
|
+
) -> None:
|
|
138
|
+
"""
|
|
139
|
+
Log a successful HTTP response.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
request_id: ID returned from log_request
|
|
143
|
+
status_code: HTTP status code
|
|
144
|
+
response_body: Response body
|
|
145
|
+
"""
|
|
146
|
+
if request_id not in self._active_requests:
|
|
147
|
+
return
|
|
148
|
+
|
|
149
|
+
request_data = self._active_requests.pop(request_id)
|
|
150
|
+
timing_ms = (time.time() - request_data["start_time"]) * 1000
|
|
151
|
+
|
|
152
|
+
# Convert bytes to base64 for JSON serialization
|
|
153
|
+
serializable_body = response_body
|
|
154
|
+
if isinstance(response_body, bytes):
|
|
155
|
+
serializable_body = {
|
|
156
|
+
"_binary": True,
|
|
157
|
+
"_base64": base64.b64encode(response_body).decode("utf-8"),
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
log_entry = RequestLog(
|
|
161
|
+
method=request_data["method"],
|
|
162
|
+
url=request_data["url"],
|
|
163
|
+
path=request_data["path"],
|
|
164
|
+
headers=request_data["headers"],
|
|
165
|
+
params=request_data["params"],
|
|
166
|
+
body=request_data["body"],
|
|
167
|
+
response_status=status_code,
|
|
168
|
+
response_body=serializable_body,
|
|
169
|
+
timing_ms=timing_ms,
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
self.session.logs.append(log_entry)
|
|
173
|
+
self._rotate_logs_if_needed()
|
|
174
|
+
|
|
175
|
+
def log_error(
|
|
176
|
+
self,
|
|
177
|
+
request_id: str,
|
|
178
|
+
error: str,
|
|
179
|
+
status_code: Optional[int] = None,
|
|
180
|
+
) -> None:
|
|
181
|
+
"""
|
|
182
|
+
Log an HTTP request error.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
request_id: ID returned from log_request
|
|
186
|
+
error: Error message
|
|
187
|
+
status_code: HTTP status code if available
|
|
188
|
+
"""
|
|
189
|
+
if request_id not in self._active_requests:
|
|
190
|
+
return
|
|
191
|
+
|
|
192
|
+
request_data = self._active_requests.pop(request_id)
|
|
193
|
+
timing_ms = (time.time() - request_data["start_time"]) * 1000
|
|
194
|
+
|
|
195
|
+
log_entry = RequestLog(
|
|
196
|
+
method=request_data["method"],
|
|
197
|
+
url=request_data["url"],
|
|
198
|
+
path=request_data["path"],
|
|
199
|
+
headers=request_data["headers"],
|
|
200
|
+
params=request_data["params"],
|
|
201
|
+
body=request_data["body"],
|
|
202
|
+
response_status=status_code,
|
|
203
|
+
timing_ms=timing_ms,
|
|
204
|
+
error=error,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
self.session.logs.append(log_entry)
|
|
208
|
+
self._rotate_logs_if_needed()
|
|
209
|
+
|
|
210
|
+
def log_chunk_fetch(self, chunk: bytes) -> None:
|
|
211
|
+
"""Log a chunk from streaming response.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
chunk: Binary chunk data from streaming response
|
|
215
|
+
"""
|
|
216
|
+
self.session.chunk_logs.append(chunk)
|
|
217
|
+
|
|
218
|
+
def save(self) -> None:
|
|
219
|
+
"""Write the current session to the log file.
|
|
220
|
+
|
|
221
|
+
Includes both rotated logs and current active logs to ensure
|
|
222
|
+
no data loss during bounded logging.
|
|
223
|
+
"""
|
|
224
|
+
# Combine rotated logs with current logs for complete session
|
|
225
|
+
all_logs = self._rotated_logs + self.session.logs
|
|
226
|
+
|
|
227
|
+
# Create a temporary session with all logs for serialization
|
|
228
|
+
session_data = self.session.model_dump(mode="json")
|
|
229
|
+
session_data["logs"] = [log.model_dump(mode="json") for log in all_logs]
|
|
230
|
+
|
|
231
|
+
with open(self.log_file, "w") as f:
|
|
232
|
+
json.dump(session_data, f, indent=2, default=str)
|
|
233
|
+
|
|
234
|
+
def close(self) -> None:
|
|
235
|
+
"""Finalize and save the logging session."""
|
|
236
|
+
self.save()
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
class NullLogger:
|
|
240
|
+
"""No-op logger for when logging is disabled."""
|
|
241
|
+
|
|
242
|
+
def log_request(self, *args, **kwargs) -> str:
|
|
243
|
+
"""No-op log_request."""
|
|
244
|
+
return ""
|
|
245
|
+
|
|
246
|
+
def log_response(self, *args, **kwargs) -> None:
|
|
247
|
+
"""No-op log_response."""
|
|
248
|
+
pass
|
|
249
|
+
|
|
250
|
+
def log_error(self, *args, **kwargs) -> None:
|
|
251
|
+
"""No-op log_error."""
|
|
252
|
+
pass
|
|
253
|
+
|
|
254
|
+
def log_chunk_fetch(self, chunk: bytes) -> None:
|
|
255
|
+
"""No-op chunk logging for production."""
|
|
256
|
+
pass
|
|
257
|
+
|
|
258
|
+
def save(self) -> None:
|
|
259
|
+
"""No-op save."""
|
|
260
|
+
pass
|
|
261
|
+
|
|
262
|
+
def close(self) -> None:
|
|
263
|
+
"""No-op close."""
|
|
264
|
+
pass
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""Type definitions for request/response logging."""
|
|
2
|
+
|
|
3
|
+
import base64
|
|
4
|
+
from datetime import UTC, datetime
|
|
5
|
+
from typing import Any, Dict, List, Optional
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, ConfigDict, Field, field_serializer, field_validator
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _utc_now() -> datetime:
|
|
11
|
+
"""Get current UTC datetime (timezone-aware)."""
|
|
12
|
+
return datetime.now(UTC)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _encode_bytes(v: bytes) -> dict:
|
|
16
|
+
"""Encode bytes as base64 for JSON serialization."""
|
|
17
|
+
return {"_binary": True, "_base64": base64.b64encode(v).decode("utf-8")}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class RequestLog(BaseModel):
|
|
21
|
+
"""Captures a single HTTP request/response interaction."""
|
|
22
|
+
|
|
23
|
+
model_config = ConfigDict()
|
|
24
|
+
|
|
25
|
+
timestamp: datetime = Field(default_factory=_utc_now)
|
|
26
|
+
method: str
|
|
27
|
+
url: str
|
|
28
|
+
path: str
|
|
29
|
+
headers: Dict[str, str] = Field(default_factory=dict)
|
|
30
|
+
params: Optional[Dict[str, Any]] = None
|
|
31
|
+
body: Optional[Any] = None
|
|
32
|
+
response_status: Optional[int] = None
|
|
33
|
+
response_body: Optional[Any] = None
|
|
34
|
+
timing_ms: Optional[float] = None
|
|
35
|
+
error: Optional[str] = None
|
|
36
|
+
|
|
37
|
+
@field_serializer("timestamp")
|
|
38
|
+
def serialize_datetime(self, value: datetime) -> str:
|
|
39
|
+
return value.isoformat()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class LogSession(BaseModel):
|
|
43
|
+
"""Collection of request logs with session metadata.
|
|
44
|
+
|
|
45
|
+
When max_logs is set, the session will maintain a bounded buffer of recent logs.
|
|
46
|
+
Older logs should be flushed to disk before being discarded (handled by RequestLogger).
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
model_config = ConfigDict()
|
|
50
|
+
|
|
51
|
+
session_id: str
|
|
52
|
+
started_at: datetime = Field(default_factory=_utc_now)
|
|
53
|
+
connector_name: Optional[str] = None
|
|
54
|
+
logs: List[RequestLog] = Field(default_factory=list)
|
|
55
|
+
max_logs: Optional[int] = Field(
|
|
56
|
+
default=10000,
|
|
57
|
+
description="Maximum number of logs to keep in memory. "
|
|
58
|
+
"When limit is reached, oldest logs should be flushed before removal. "
|
|
59
|
+
"Set to None for unlimited (not recommended for production).",
|
|
60
|
+
)
|
|
61
|
+
chunk_logs: List[bytes] = Field(
|
|
62
|
+
default_factory=list,
|
|
63
|
+
description="Captured chunks from streaming responses. " "Each chunk is logged when log_chunk_fetch() is called.",
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
@field_validator("chunk_logs", mode="before")
|
|
67
|
+
@classmethod
|
|
68
|
+
def decode_chunk_logs(cls, v: Any) -> List[bytes]:
|
|
69
|
+
"""Decode chunk_logs from JSON representation back to bytes."""
|
|
70
|
+
if v is None or v == []:
|
|
71
|
+
return []
|
|
72
|
+
if isinstance(v, list):
|
|
73
|
+
result = []
|
|
74
|
+
for item in v:
|
|
75
|
+
if isinstance(item, bytes):
|
|
76
|
+
result.append(item)
|
|
77
|
+
elif isinstance(item, dict) and item.get("_binary"):
|
|
78
|
+
# Decode from {"_binary": True, "_base64": "..."} format
|
|
79
|
+
result.append(base64.b64decode(item["_base64"]))
|
|
80
|
+
else:
|
|
81
|
+
result.append(item)
|
|
82
|
+
return result
|
|
83
|
+
return v
|
|
84
|
+
|
|
85
|
+
@field_serializer("started_at")
|
|
86
|
+
def serialize_datetime(self, value: datetime) -> str:
|
|
87
|
+
return value.isoformat()
|
|
88
|
+
|
|
89
|
+
@field_serializer("chunk_logs")
|
|
90
|
+
def serialize_chunk_logs(self, value: List[bytes]) -> List[dict]:
|
|
91
|
+
"""Serialize bytes chunks as base64 for JSON."""
|
|
92
|
+
return [_encode_bytes(chunk) for chunk in value]
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Shared observability components for logging and telemetry."""
|
|
2
|
+
|
|
3
|
+
from .models import OperationMetadata
|
|
4
|
+
from .redactor import DataRedactor
|
|
5
|
+
from .session import ObservabilitySession
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"DataRedactor",
|
|
9
|
+
"ObservabilitySession",
|
|
10
|
+
"OperationMetadata",
|
|
11
|
+
]
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Shared operation metadata models."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Any, Dict, Optional
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class OperationMetadata:
|
|
10
|
+
"""Shared operation metadata."""
|
|
11
|
+
|
|
12
|
+
entity: str
|
|
13
|
+
action: str
|
|
14
|
+
timestamp: datetime
|
|
15
|
+
timing_ms: Optional[float] = None
|
|
16
|
+
status_code: Optional[int] = None
|
|
17
|
+
error_type: Optional[str] = None
|
|
18
|
+
error_message: Optional[str] = None
|
|
19
|
+
params: Optional[Dict[str, Any]] = None
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Shared redaction logic for both logging and telemetry."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict
|
|
4
|
+
from urllib.parse import parse_qs, urlencode, urlparse, urlunparse
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DataRedactor:
|
|
8
|
+
"""Shared redaction logic for both logging and telemetry."""
|
|
9
|
+
|
|
10
|
+
SENSITIVE_HEADER_PATTERNS = [
|
|
11
|
+
"authorization",
|
|
12
|
+
"api-key",
|
|
13
|
+
"x-api-key",
|
|
14
|
+
"token",
|
|
15
|
+
"bearer",
|
|
16
|
+
"secret",
|
|
17
|
+
"password",
|
|
18
|
+
"credential",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
SENSITIVE_PARAM_PATTERNS = [
|
|
22
|
+
"password",
|
|
23
|
+
"secret",
|
|
24
|
+
"api_key",
|
|
25
|
+
"apikey",
|
|
26
|
+
"token",
|
|
27
|
+
"credentials",
|
|
28
|
+
"auth",
|
|
29
|
+
"key",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
@staticmethod
|
|
33
|
+
def redact_headers(headers: Dict[str, str]) -> Dict[str, str]:
|
|
34
|
+
"""Redact sensitive headers."""
|
|
35
|
+
redacted = {}
|
|
36
|
+
for key, value in headers.items():
|
|
37
|
+
if any(pattern in key.lower() for pattern in DataRedactor.SENSITIVE_HEADER_PATTERNS):
|
|
38
|
+
redacted[key] = "***REDACTED***"
|
|
39
|
+
else:
|
|
40
|
+
redacted[key] = value
|
|
41
|
+
return redacted
|
|
42
|
+
|
|
43
|
+
@staticmethod
|
|
44
|
+
def redact_params(params: Dict[str, Any]) -> Dict[str, Any]:
|
|
45
|
+
"""Redact sensitive parameters."""
|
|
46
|
+
redacted = {}
|
|
47
|
+
for key, value in params.items():
|
|
48
|
+
if any(pattern in key.lower() for pattern in DataRedactor.SENSITIVE_PARAM_PATTERNS):
|
|
49
|
+
redacted[key] = "***REDACTED***"
|
|
50
|
+
else:
|
|
51
|
+
redacted[key] = value
|
|
52
|
+
return redacted
|
|
53
|
+
|
|
54
|
+
@staticmethod
|
|
55
|
+
def redact_url(url: str) -> str:
|
|
56
|
+
"""Redact sensitive query params from URL."""
|
|
57
|
+
parsed = urlparse(url)
|
|
58
|
+
if not parsed.query:
|
|
59
|
+
return url
|
|
60
|
+
|
|
61
|
+
params = parse_qs(parsed.query)
|
|
62
|
+
redacted_params = {}
|
|
63
|
+
|
|
64
|
+
for key, values in params.items():
|
|
65
|
+
if any(pattern in key.lower() for pattern in DataRedactor.SENSITIVE_PARAM_PATTERNS):
|
|
66
|
+
redacted_params[key] = ["***REDACTED***"] * len(values)
|
|
67
|
+
else:
|
|
68
|
+
redacted_params[key] = values
|
|
69
|
+
|
|
70
|
+
# Reconstruct URL with redacted params
|
|
71
|
+
new_query = urlencode(redacted_params, doseq=True)
|
|
72
|
+
return urlunparse(
|
|
73
|
+
(
|
|
74
|
+
parsed.scheme,
|
|
75
|
+
parsed.netloc,
|
|
76
|
+
parsed.path,
|
|
77
|
+
parsed.params,
|
|
78
|
+
new_query,
|
|
79
|
+
parsed.fragment,
|
|
80
|
+
)
|
|
81
|
+
)
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"""Shared session context for both logging and telemetry."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import uuid
|
|
5
|
+
from datetime import UTC, datetime
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Dict, Optional
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def get_persistent_user_id() -> str:
|
|
13
|
+
"""
|
|
14
|
+
Get or create an anonymous user ID stored in the home directory.
|
|
15
|
+
|
|
16
|
+
The ID is stored in ~/.airbyte/ai_sdk_user_id and persists across all sessions.
|
|
17
|
+
If the file doesn't exist, a new UUID is generated and saved.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
An anonymous UUID string that uniquely identifies this user across sessions.
|
|
21
|
+
"""
|
|
22
|
+
try:
|
|
23
|
+
# Create .airbyte directory in home folder if it doesn't exist
|
|
24
|
+
airbyte_dir = Path.home() / ".airbyte"
|
|
25
|
+
airbyte_dir.mkdir(exist_ok=True)
|
|
26
|
+
|
|
27
|
+
# Path to user ID file
|
|
28
|
+
user_id_file = airbyte_dir / "ai_sdk_user_id"
|
|
29
|
+
|
|
30
|
+
# Try to read existing user ID
|
|
31
|
+
if user_id_file.exists():
|
|
32
|
+
user_id = user_id_file.read_text().strip()
|
|
33
|
+
if user_id: # Validate it's not empty
|
|
34
|
+
return user_id
|
|
35
|
+
|
|
36
|
+
# Generate new user ID if file doesn't exist or is empty
|
|
37
|
+
user_id = str(uuid.uuid4())
|
|
38
|
+
user_id_file.write_text(user_id)
|
|
39
|
+
logger.debug(f"Generated new anonymous user ID: {user_id}")
|
|
40
|
+
|
|
41
|
+
return user_id
|
|
42
|
+
except Exception as e:
|
|
43
|
+
# If we can't read/write the file, generate a session-only ID
|
|
44
|
+
logger.debug(f"Could not access anonymous user ID file: {e}")
|
|
45
|
+
return str(uuid.uuid4())
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def get_public_ip() -> Optional[str]:
|
|
49
|
+
"""
|
|
50
|
+
Fetch the public IP address of the user.
|
|
51
|
+
|
|
52
|
+
Returns None if unable to fetch (network issues, etc).
|
|
53
|
+
Uses httpx for a robust HTTP request to a public IP service.
|
|
54
|
+
"""
|
|
55
|
+
try:
|
|
56
|
+
import httpx
|
|
57
|
+
|
|
58
|
+
# Use a short timeout to avoid blocking
|
|
59
|
+
with httpx.Client(timeout=2.0) as client:
|
|
60
|
+
response = client.get("https://api.ipify.org?format=text")
|
|
61
|
+
response.raise_for_status()
|
|
62
|
+
return response.text.strip()
|
|
63
|
+
except Exception:
|
|
64
|
+
# Never fail - just return None
|
|
65
|
+
return None
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class ObservabilitySession:
|
|
69
|
+
"""Shared session context for both logging and telemetry."""
|
|
70
|
+
|
|
71
|
+
def __init__(
|
|
72
|
+
self,
|
|
73
|
+
connector_name: str,
|
|
74
|
+
connector_version: Optional[str] = None,
|
|
75
|
+
execution_context: str = "direct",
|
|
76
|
+
session_id: Optional[str] = None,
|
|
77
|
+
):
|
|
78
|
+
self.session_id = session_id or str(uuid.uuid4())
|
|
79
|
+
self.user_id = get_persistent_user_id()
|
|
80
|
+
self.connector_name = connector_name
|
|
81
|
+
self.connector_version = connector_version
|
|
82
|
+
self.execution_context = execution_context
|
|
83
|
+
self.started_at = datetime.now(UTC)
|
|
84
|
+
self.operation_count = 0
|
|
85
|
+
self.metadata: Dict[str, Any] = {}
|
|
86
|
+
self.public_ip = get_public_ip()
|
|
87
|
+
|
|
88
|
+
def increment_operations(self):
|
|
89
|
+
"""Increment the operation counter."""
|
|
90
|
+
self.operation_count += 1
|
|
91
|
+
|
|
92
|
+
def duration_seconds(self) -> float:
|
|
93
|
+
"""Calculate session duration in seconds."""
|
|
94
|
+
return (datetime.now(UTC) - self.started_at).total_seconds()
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Performance instrumentation decorator for async functions."""
|
|
2
|
+
|
|
3
|
+
import functools
|
|
4
|
+
import logging
|
|
5
|
+
import time
|
|
6
|
+
from typing import Any, Callable, TypeVar
|
|
7
|
+
|
|
8
|
+
# Type variable for generic function decoration
|
|
9
|
+
F = TypeVar("F", bound=Callable[..., Any])
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def instrument(metric_name: str) -> Callable[[F], F]:
|
|
15
|
+
"""Decorator to instrument async functions with performance tracking.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
metric_name: Name of the metric to track
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
Decorator function
|
|
22
|
+
|
|
23
|
+
Example:
|
|
24
|
+
@instrument("stripe.customer.list")
|
|
25
|
+
async def list_customers():
|
|
26
|
+
...
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def decorator(func: F) -> F:
|
|
30
|
+
@functools.wraps(func)
|
|
31
|
+
async def wrapper(*args, **kwargs):
|
|
32
|
+
start_time = time.time()
|
|
33
|
+
success = True
|
|
34
|
+
error = None
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
result = await func(*args, **kwargs)
|
|
38
|
+
return result
|
|
39
|
+
|
|
40
|
+
except Exception as e:
|
|
41
|
+
success = False
|
|
42
|
+
error = e
|
|
43
|
+
raise
|
|
44
|
+
|
|
45
|
+
finally:
|
|
46
|
+
duration = time.time() - start_time
|
|
47
|
+
duration_ms = duration * 1000
|
|
48
|
+
|
|
49
|
+
# Log performance metrics
|
|
50
|
+
if success:
|
|
51
|
+
logger.debug(f"[{metric_name}] completed in {duration_ms:.2f}ms")
|
|
52
|
+
else:
|
|
53
|
+
logger.warning(f"[{metric_name}] failed after {duration_ms:.2f}ms: {error}")
|
|
54
|
+
|
|
55
|
+
return wrapper # type: ignore
|
|
56
|
+
|
|
57
|
+
return decorator
|