airbyte-agent-klaviyo 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_agent_klaviyo/__init__.py +225 -0
- airbyte_agent_klaviyo/_vendored/__init__.py +1 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/__init__.py +82 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/auth_strategies.py +1171 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/auth_template.py +135 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/cloud_utils/__init__.py +5 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/cloud_utils/client.py +213 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/connector_model_loader.py +1120 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/constants.py +78 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/exceptions.py +23 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/executor/__init__.py +31 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/executor/hosted_executor.py +201 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/executor/local_executor.py +1854 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/executor/models.py +202 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/extensions.py +693 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/http/__init__.py +37 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/http/adapters/__init__.py +9 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/http/adapters/httpx_adapter.py +251 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/http/config.py +98 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/http/exceptions.py +119 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/http/protocols.py +114 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/http/response.py +104 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/http_client.py +693 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/introspection.py +481 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/logging/__init__.py +11 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/logging/logger.py +273 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/logging/types.py +93 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/observability/__init__.py +11 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/observability/config.py +179 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/observability/models.py +19 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/observability/redactor.py +81 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/observability/session.py +103 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/performance/__init__.py +6 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/performance/instrumentation.py +57 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/performance/metrics.py +93 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/schema/__init__.py +75 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/schema/base.py +201 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/schema/components.py +244 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/schema/connector.py +120 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/schema/extensions.py +301 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/schema/operations.py +156 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/schema/security.py +236 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/secrets.py +182 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/telemetry/__init__.py +10 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/telemetry/config.py +32 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/telemetry/events.py +59 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/telemetry/tracker.py +155 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/types.py +270 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/utils.py +60 -0
- airbyte_agent_klaviyo/_vendored/connector_sdk/validation.py +848 -0
- airbyte_agent_klaviyo/connector.py +1431 -0
- airbyte_agent_klaviyo/connector_model.py +2230 -0
- airbyte_agent_klaviyo/models.py +676 -0
- airbyte_agent_klaviyo/types.py +1319 -0
- airbyte_agent_klaviyo-0.1.0.dist-info/METADATA +151 -0
- airbyte_agent_klaviyo-0.1.0.dist-info/RECORD +57 -0
- airbyte_agent_klaviyo-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
"""Request/response logging implementation."""
|
|
2
|
+
|
|
3
|
+
import base64
|
|
4
|
+
import json
|
|
5
|
+
import time
|
|
6
|
+
import uuid
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Dict, Set
|
|
9
|
+
|
|
10
|
+
from .types import LogSession, RequestLog
|
|
11
|
+
|
|
12
|
+
# Headers to redact for security
|
|
13
|
+
SENSITIVE_HEADERS: Set[str] = {
|
|
14
|
+
"authorization",
|
|
15
|
+
"bearer",
|
|
16
|
+
"api-key",
|
|
17
|
+
"x-api-key",
|
|
18
|
+
"token",
|
|
19
|
+
"secret",
|
|
20
|
+
"password",
|
|
21
|
+
"credential",
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class RequestLogger:
|
|
26
|
+
"""Captures HTTP request/response interactions to a JSON file.
|
|
27
|
+
|
|
28
|
+
Implements bounded logging with automatic rotation and flush-before-discard
|
|
29
|
+
to prevent unbounded memory growth in long-running processes.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
log_file: str | None = None,
|
|
35
|
+
connector_name: str | None = None,
|
|
36
|
+
max_logs: int | None = 10000,
|
|
37
|
+
):
|
|
38
|
+
"""
|
|
39
|
+
Initialize the request logger.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
log_file: Path to write logs. If None, generates timestamped filename.
|
|
43
|
+
connector_name: Name of the connector being logged.
|
|
44
|
+
max_logs: Maximum number of logs to keep in memory before rotation.
|
|
45
|
+
Set to None for unlimited (not recommended for production).
|
|
46
|
+
Defaults to 10000.
|
|
47
|
+
"""
|
|
48
|
+
if log_file is None:
|
|
49
|
+
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
|
50
|
+
log_file = f".logs/session_{timestamp}.json"
|
|
51
|
+
|
|
52
|
+
self.log_file = Path(log_file)
|
|
53
|
+
self.log_file.parent.mkdir(parents=True, exist_ok=True)
|
|
54
|
+
|
|
55
|
+
self.session = LogSession(
|
|
56
|
+
session_id=str(uuid.uuid4()),
|
|
57
|
+
connector_name=connector_name,
|
|
58
|
+
max_logs=max_logs,
|
|
59
|
+
)
|
|
60
|
+
self._active_requests: Dict[str, Dict[str, Any]] = {}
|
|
61
|
+
# Store rotated logs that have been flushed from active buffer
|
|
62
|
+
self._rotated_logs: list[RequestLog] = []
|
|
63
|
+
|
|
64
|
+
def _redact_headers(self, headers: Dict[str, str]) -> Dict[str, str]:
|
|
65
|
+
"""Redact sensitive headers."""
|
|
66
|
+
redacted = {}
|
|
67
|
+
for key, value in headers.items():
|
|
68
|
+
if any(sensitive in key.lower() for sensitive in SENSITIVE_HEADERS):
|
|
69
|
+
redacted[key] = "[REDACTED]"
|
|
70
|
+
else:
|
|
71
|
+
redacted[key] = value
|
|
72
|
+
return redacted
|
|
73
|
+
|
|
74
|
+
def _rotate_logs_if_needed(self) -> None:
|
|
75
|
+
"""Rotate logs if max_logs limit is reached.
|
|
76
|
+
|
|
77
|
+
Moves oldest logs to _rotated_logs before removing them from active buffer.
|
|
78
|
+
This ensures logs are preserved for final save() without memory growth.
|
|
79
|
+
"""
|
|
80
|
+
max_logs = self.session.max_logs
|
|
81
|
+
if max_logs is None:
|
|
82
|
+
# Unlimited logging, no rotation needed
|
|
83
|
+
return
|
|
84
|
+
|
|
85
|
+
current_count = len(self.session.logs)
|
|
86
|
+
if current_count >= max_logs:
|
|
87
|
+
# Calculate how many logs to rotate (keep buffer at ~90% to avoid thrashing)
|
|
88
|
+
num_to_rotate = max(1, current_count - int(max_logs * 0.9))
|
|
89
|
+
|
|
90
|
+
# Move oldest logs to rotated buffer
|
|
91
|
+
rotated = self.session.logs[:num_to_rotate]
|
|
92
|
+
self._rotated_logs.extend(rotated)
|
|
93
|
+
|
|
94
|
+
# Remove rotated logs from active buffer
|
|
95
|
+
self.session.logs = self.session.logs[num_to_rotate:]
|
|
96
|
+
|
|
97
|
+
def log_request(
|
|
98
|
+
self,
|
|
99
|
+
method: str,
|
|
100
|
+
url: str,
|
|
101
|
+
path: str,
|
|
102
|
+
headers: Dict[str, str] | None = None,
|
|
103
|
+
params: Dict[str, Any] | None = None,
|
|
104
|
+
body: Any | None = None,
|
|
105
|
+
) -> str:
|
|
106
|
+
"""
|
|
107
|
+
Log the start of an HTTP request.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
method: HTTP method (GET, POST, etc.)
|
|
111
|
+
url: Full URL
|
|
112
|
+
path: Request path
|
|
113
|
+
headers: Request headers
|
|
114
|
+
params: Query parameters
|
|
115
|
+
body: Request body
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
Request ID for correlating with response
|
|
119
|
+
"""
|
|
120
|
+
request_id = str(uuid.uuid4())
|
|
121
|
+
self._active_requests[request_id] = {
|
|
122
|
+
"start_time": time.time(),
|
|
123
|
+
"method": method,
|
|
124
|
+
"url": url,
|
|
125
|
+
"path": path,
|
|
126
|
+
"headers": self._redact_headers(headers or {}),
|
|
127
|
+
"params": params,
|
|
128
|
+
"body": body,
|
|
129
|
+
}
|
|
130
|
+
return request_id
|
|
131
|
+
|
|
132
|
+
def log_response(
|
|
133
|
+
self,
|
|
134
|
+
request_id: str,
|
|
135
|
+
status_code: int,
|
|
136
|
+
response_body: Any | None = None,
|
|
137
|
+
response_headers: Dict[str, str] | None = None,
|
|
138
|
+
) -> None:
|
|
139
|
+
"""
|
|
140
|
+
Log a successful HTTP response.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
request_id: ID returned from log_request
|
|
144
|
+
status_code: HTTP status code
|
|
145
|
+
response_body: Response body
|
|
146
|
+
response_headers: Response headers
|
|
147
|
+
"""
|
|
148
|
+
if request_id not in self._active_requests:
|
|
149
|
+
return
|
|
150
|
+
|
|
151
|
+
request_data = self._active_requests.pop(request_id)
|
|
152
|
+
timing_ms = (time.time() - request_data["start_time"]) * 1000
|
|
153
|
+
|
|
154
|
+
# Convert bytes to base64 for JSON serialization
|
|
155
|
+
serializable_body = response_body
|
|
156
|
+
if isinstance(response_body, bytes):
|
|
157
|
+
serializable_body = {
|
|
158
|
+
"_binary": True,
|
|
159
|
+
"_base64": base64.b64encode(response_body).decode("utf-8"),
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
log_entry = RequestLog(
|
|
163
|
+
method=request_data["method"],
|
|
164
|
+
url=request_data["url"],
|
|
165
|
+
path=request_data["path"],
|
|
166
|
+
headers=request_data["headers"],
|
|
167
|
+
params=request_data["params"],
|
|
168
|
+
body=request_data["body"],
|
|
169
|
+
response_status=status_code,
|
|
170
|
+
response_body=serializable_body,
|
|
171
|
+
response_headers=response_headers or {},
|
|
172
|
+
timing_ms=timing_ms,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
self.session.logs.append(log_entry)
|
|
176
|
+
self._rotate_logs_if_needed()
|
|
177
|
+
|
|
178
|
+
def log_error(
|
|
179
|
+
self,
|
|
180
|
+
request_id: str,
|
|
181
|
+
error: str,
|
|
182
|
+
status_code: int | None = None,
|
|
183
|
+
) -> None:
|
|
184
|
+
"""
|
|
185
|
+
Log an HTTP request error.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
request_id: ID returned from log_request
|
|
189
|
+
error: Error message
|
|
190
|
+
status_code: HTTP status code if available
|
|
191
|
+
"""
|
|
192
|
+
if request_id not in self._active_requests:
|
|
193
|
+
return
|
|
194
|
+
|
|
195
|
+
request_data = self._active_requests.pop(request_id)
|
|
196
|
+
timing_ms = (time.time() - request_data["start_time"]) * 1000
|
|
197
|
+
|
|
198
|
+
log_entry = RequestLog(
|
|
199
|
+
method=request_data["method"],
|
|
200
|
+
url=request_data["url"],
|
|
201
|
+
path=request_data["path"],
|
|
202
|
+
headers=request_data["headers"],
|
|
203
|
+
params=request_data["params"],
|
|
204
|
+
body=request_data["body"],
|
|
205
|
+
response_status=status_code,
|
|
206
|
+
timing_ms=timing_ms,
|
|
207
|
+
error=error,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
self.session.logs.append(log_entry)
|
|
211
|
+
self._rotate_logs_if_needed()
|
|
212
|
+
|
|
213
|
+
def log_chunk_fetch(self, chunk: bytes) -> None:
|
|
214
|
+
"""Log a chunk from streaming response.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
chunk: Binary chunk data from streaming response
|
|
218
|
+
"""
|
|
219
|
+
self.session.chunk_logs.append(chunk)
|
|
220
|
+
|
|
221
|
+
def save(self) -> None:
|
|
222
|
+
"""Write the current session to the log file.
|
|
223
|
+
|
|
224
|
+
Includes both rotated logs and current active logs to ensure
|
|
225
|
+
no data loss during bounded logging.
|
|
226
|
+
"""
|
|
227
|
+
# Combine rotated logs with current logs for complete session
|
|
228
|
+
all_logs = self._rotated_logs + self.session.logs
|
|
229
|
+
|
|
230
|
+
# Create a temporary session with all logs for serialization
|
|
231
|
+
session_data = self.session.model_dump(mode="json")
|
|
232
|
+
session_data["logs"] = [log.model_dump(mode="json") for log in all_logs]
|
|
233
|
+
|
|
234
|
+
with open(self.log_file, "w") as f:
|
|
235
|
+
json.dump(session_data, f, indent=2, default=str)
|
|
236
|
+
|
|
237
|
+
def close(self) -> None:
|
|
238
|
+
"""Finalize and save the logging session."""
|
|
239
|
+
self.save()
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
class NullLogger:
|
|
243
|
+
"""No-op logger for when logging is disabled."""
|
|
244
|
+
|
|
245
|
+
def log_request(self, *args, **kwargs) -> str:
|
|
246
|
+
"""No-op log_request."""
|
|
247
|
+
return ""
|
|
248
|
+
|
|
249
|
+
def log_response(
|
|
250
|
+
self,
|
|
251
|
+
request_id: str,
|
|
252
|
+
status_code: int,
|
|
253
|
+
response_body: Any | None = None,
|
|
254
|
+
response_headers: Dict[str, str] | None = None,
|
|
255
|
+
) -> None:
|
|
256
|
+
"""No-op log_response."""
|
|
257
|
+
pass
|
|
258
|
+
|
|
259
|
+
def log_error(self, *args, **kwargs) -> None:
|
|
260
|
+
"""No-op log_error."""
|
|
261
|
+
pass
|
|
262
|
+
|
|
263
|
+
def log_chunk_fetch(self, chunk: bytes) -> None:
|
|
264
|
+
"""No-op chunk logging for production."""
|
|
265
|
+
pass
|
|
266
|
+
|
|
267
|
+
def save(self) -> None:
|
|
268
|
+
"""No-op save."""
|
|
269
|
+
pass
|
|
270
|
+
|
|
271
|
+
def close(self) -> None:
|
|
272
|
+
"""No-op close."""
|
|
273
|
+
pass
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""Type definitions for request/response logging."""
|
|
2
|
+
|
|
3
|
+
import base64
|
|
4
|
+
from datetime import UTC, datetime
|
|
5
|
+
from typing import Any, Dict, List
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, ConfigDict, Field, field_serializer, field_validator
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _utc_now() -> datetime:
|
|
11
|
+
"""Get current UTC datetime (timezone-aware)."""
|
|
12
|
+
return datetime.now(UTC)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _encode_bytes(v: bytes) -> dict:
|
|
16
|
+
"""Encode bytes as base64 for JSON serialization."""
|
|
17
|
+
return {"_binary": True, "_base64": base64.b64encode(v).decode("utf-8")}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class RequestLog(BaseModel):
|
|
21
|
+
"""Captures a single HTTP request/response interaction."""
|
|
22
|
+
|
|
23
|
+
model_config = ConfigDict()
|
|
24
|
+
|
|
25
|
+
timestamp: datetime = Field(default_factory=_utc_now)
|
|
26
|
+
method: str
|
|
27
|
+
url: str
|
|
28
|
+
path: str
|
|
29
|
+
headers: Dict[str, str] = Field(default_factory=dict)
|
|
30
|
+
params: Dict[str, Any] | None = None
|
|
31
|
+
body: Any | None = None
|
|
32
|
+
response_status: int | None = None
|
|
33
|
+
response_body: Any | None = None
|
|
34
|
+
response_headers: Dict[str, str] = Field(default_factory=dict)
|
|
35
|
+
timing_ms: float | None = None
|
|
36
|
+
error: str | None = None
|
|
37
|
+
|
|
38
|
+
@field_serializer("timestamp")
|
|
39
|
+
def serialize_datetime(self, value: datetime) -> str:
|
|
40
|
+
return value.isoformat()
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class LogSession(BaseModel):
|
|
44
|
+
"""Collection of request logs with session metadata.
|
|
45
|
+
|
|
46
|
+
When max_logs is set, the session will maintain a bounded buffer of recent logs.
|
|
47
|
+
Older logs should be flushed to disk before being discarded (handled by RequestLogger).
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
model_config = ConfigDict()
|
|
51
|
+
|
|
52
|
+
session_id: str
|
|
53
|
+
started_at: datetime = Field(default_factory=_utc_now)
|
|
54
|
+
connector_name: str | None = None
|
|
55
|
+
logs: List[RequestLog] = Field(default_factory=list)
|
|
56
|
+
max_logs: int | None = Field(
|
|
57
|
+
default=10000,
|
|
58
|
+
description="Maximum number of logs to keep in memory. "
|
|
59
|
+
"When limit is reached, oldest logs should be flushed before removal. "
|
|
60
|
+
"Set to None for unlimited (not recommended for production).",
|
|
61
|
+
)
|
|
62
|
+
chunk_logs: List[bytes] = Field(
|
|
63
|
+
default_factory=list,
|
|
64
|
+
description="Captured chunks from streaming responses. Each chunk is logged when log_chunk_fetch() is called.",
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
@field_validator("chunk_logs", mode="before")
|
|
68
|
+
@classmethod
|
|
69
|
+
def decode_chunk_logs(cls, v: Any) -> List[bytes]:
|
|
70
|
+
"""Decode chunk_logs from JSON representation back to bytes."""
|
|
71
|
+
if v is None or v == []:
|
|
72
|
+
return []
|
|
73
|
+
if isinstance(v, list):
|
|
74
|
+
result = []
|
|
75
|
+
for item in v:
|
|
76
|
+
if isinstance(item, bytes):
|
|
77
|
+
result.append(item)
|
|
78
|
+
elif isinstance(item, dict) and item.get("_binary"):
|
|
79
|
+
# Decode from {"_binary": True, "_base64": "..."} format
|
|
80
|
+
result.append(base64.b64decode(item["_base64"]))
|
|
81
|
+
else:
|
|
82
|
+
result.append(item)
|
|
83
|
+
return result
|
|
84
|
+
return v
|
|
85
|
+
|
|
86
|
+
@field_serializer("started_at")
|
|
87
|
+
def serialize_datetime(self, value: datetime) -> str:
|
|
88
|
+
return value.isoformat()
|
|
89
|
+
|
|
90
|
+
@field_serializer("chunk_logs")
|
|
91
|
+
def serialize_chunk_logs(self, value: List[bytes]) -> List[dict]:
|
|
92
|
+
"""Serialize bytes chunks as base64 for JSON."""
|
|
93
|
+
return [_encode_bytes(chunk) for chunk in value]
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Shared observability components for logging and telemetry."""
|
|
2
|
+
|
|
3
|
+
from .models import OperationMetadata
|
|
4
|
+
from .redactor import DataRedactor
|
|
5
|
+
from .session import ObservabilitySession
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"DataRedactor",
|
|
9
|
+
"ObservabilitySession",
|
|
10
|
+
"OperationMetadata",
|
|
11
|
+
]
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
"""Unified configuration for connector-sdk."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import tempfile
|
|
6
|
+
import uuid
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import yaml
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
# New config location
|
|
16
|
+
CONFIG_DIR = Path.home() / ".airbyte" / "connector-sdk"
|
|
17
|
+
CONFIG_PATH = CONFIG_DIR / "config.yaml"
|
|
18
|
+
|
|
19
|
+
# Legacy file locations (for migration)
|
|
20
|
+
LEGACY_USER_ID_PATH = Path.home() / ".airbyte" / "ai_sdk_user_id"
|
|
21
|
+
LEGACY_INTERNAL_MARKER_PATH = Path.home() / ".airbyte" / "internal_user"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class SDKConfig:
|
|
26
|
+
"""Connector SDK configuration."""
|
|
27
|
+
|
|
28
|
+
user_id: str = field(default_factory=lambda: str(uuid.uuid4()))
|
|
29
|
+
is_internal_user: bool = False
|
|
30
|
+
|
|
31
|
+
def to_dict(self) -> dict[str, Any]:
|
|
32
|
+
"""Convert to dictionary for YAML serialization."""
|
|
33
|
+
return {
|
|
34
|
+
"user_id": self.user_id,
|
|
35
|
+
"is_internal_user": self.is_internal_user,
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _delete_legacy_files() -> None:
|
|
40
|
+
"""
|
|
41
|
+
Delete legacy config files after successful migration.
|
|
42
|
+
|
|
43
|
+
Removes:
|
|
44
|
+
- ~/.airbyte/ai_sdk_user_id
|
|
45
|
+
- ~/.airbyte/internal_user
|
|
46
|
+
"""
|
|
47
|
+
for legacy_path in [LEGACY_USER_ID_PATH, LEGACY_INTERNAL_MARKER_PATH]:
|
|
48
|
+
try:
|
|
49
|
+
if legacy_path.exists():
|
|
50
|
+
legacy_path.unlink()
|
|
51
|
+
logger.debug(f"Deleted legacy config file: {legacy_path}")
|
|
52
|
+
except Exception as e:
|
|
53
|
+
logger.debug(f"Could not delete legacy file {legacy_path}: {e}")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _migrate_legacy_config() -> SDKConfig | None:
|
|
57
|
+
"""
|
|
58
|
+
Migrate from legacy file-based config to new YAML format.
|
|
59
|
+
|
|
60
|
+
Reads from:
|
|
61
|
+
- ~/.airbyte/ai_sdk_user_id (user_id)
|
|
62
|
+
- ~/.airbyte/internal_user (is_internal_user marker)
|
|
63
|
+
|
|
64
|
+
Returns SDKConfig if migration was successful, None otherwise.
|
|
65
|
+
"""
|
|
66
|
+
user_id = None
|
|
67
|
+
is_internal = False
|
|
68
|
+
|
|
69
|
+
# Try to read legacy user_id
|
|
70
|
+
try:
|
|
71
|
+
if LEGACY_USER_ID_PATH.exists():
|
|
72
|
+
user_id = LEGACY_USER_ID_PATH.read_text().strip()
|
|
73
|
+
if not user_id:
|
|
74
|
+
user_id = None
|
|
75
|
+
except Exception:
|
|
76
|
+
pass
|
|
77
|
+
|
|
78
|
+
# Check legacy internal_user marker
|
|
79
|
+
try:
|
|
80
|
+
is_internal = LEGACY_INTERNAL_MARKER_PATH.exists()
|
|
81
|
+
except Exception:
|
|
82
|
+
pass
|
|
83
|
+
|
|
84
|
+
if user_id or is_internal:
|
|
85
|
+
return SDKConfig(
|
|
86
|
+
user_id=user_id or str(uuid.uuid4()),
|
|
87
|
+
is_internal_user=is_internal,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
return None
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def load_config() -> SDKConfig:
|
|
94
|
+
"""
|
|
95
|
+
Load SDK configuration from config file.
|
|
96
|
+
|
|
97
|
+
Checks (in order):
|
|
98
|
+
1. New config file at ~/.airbyte/connector-sdk/config.yaml
|
|
99
|
+
2. Legacy files at ~/.airbyte/ai_sdk_user_id and ~/.airbyte/internal_user
|
|
100
|
+
3. Creates new config with generated user_id if nothing exists
|
|
101
|
+
|
|
102
|
+
Environment variable AIRBYTE_INTERNAL_USER can override is_internal_user.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
SDKConfig with user_id and is_internal_user
|
|
106
|
+
"""
|
|
107
|
+
config = None
|
|
108
|
+
|
|
109
|
+
# Try to load from new config file
|
|
110
|
+
try:
|
|
111
|
+
if CONFIG_PATH.exists():
|
|
112
|
+
content = CONFIG_PATH.read_text()
|
|
113
|
+
data = yaml.safe_load(content) or {}
|
|
114
|
+
config = SDKConfig(
|
|
115
|
+
user_id=data.get("user_id", str(uuid.uuid4())),
|
|
116
|
+
is_internal_user=data.get("is_internal_user", False),
|
|
117
|
+
)
|
|
118
|
+
# Always clean up legacy files if they exist (even if new config exists)
|
|
119
|
+
_delete_legacy_files()
|
|
120
|
+
except Exception as e:
|
|
121
|
+
logger.debug(f"Could not load config from {CONFIG_PATH}: {e}")
|
|
122
|
+
|
|
123
|
+
# Try to migrate from legacy files if new config doesn't exist
|
|
124
|
+
if config is None:
|
|
125
|
+
config = _migrate_legacy_config()
|
|
126
|
+
if config:
|
|
127
|
+
# Save migrated config to new location
|
|
128
|
+
try:
|
|
129
|
+
save_config(config)
|
|
130
|
+
logger.debug("Migrated legacy config to new location")
|
|
131
|
+
# Delete legacy files after successful migration
|
|
132
|
+
_delete_legacy_files()
|
|
133
|
+
except Exception as e:
|
|
134
|
+
logger.debug(f"Could not save migrated config: {e}")
|
|
135
|
+
|
|
136
|
+
# Create new config if nothing exists
|
|
137
|
+
if config is None:
|
|
138
|
+
config = SDKConfig()
|
|
139
|
+
try:
|
|
140
|
+
save_config(config)
|
|
141
|
+
except Exception as e:
|
|
142
|
+
logger.debug(f"Could not save new config: {e}")
|
|
143
|
+
|
|
144
|
+
# Environment variable override for is_internal_user
|
|
145
|
+
env_value = os.getenv("AIRBYTE_INTERNAL_USER", "").lower()
|
|
146
|
+
if env_value in ("true", "1", "yes"):
|
|
147
|
+
config.is_internal_user = True
|
|
148
|
+
elif env_value:
|
|
149
|
+
# Any other non-empty value (including "false", "0", "no") defaults to False
|
|
150
|
+
config.is_internal_user = False
|
|
151
|
+
|
|
152
|
+
return config
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def save_config(config: SDKConfig) -> None:
|
|
156
|
+
"""
|
|
157
|
+
Save SDK configuration to config file.
|
|
158
|
+
|
|
159
|
+
Creates the config directory if it doesn't exist.
|
|
160
|
+
Uses atomic writes to prevent corruption from concurrent access.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
config: SDKConfig to save
|
|
164
|
+
"""
|
|
165
|
+
CONFIG_DIR.mkdir(parents=True, exist_ok=True)
|
|
166
|
+
|
|
167
|
+
# Use atomic write: write to temp file then rename (atomic on POSIX)
|
|
168
|
+
fd, temp_path = tempfile.mkstemp(dir=CONFIG_DIR, suffix=".tmp")
|
|
169
|
+
try:
|
|
170
|
+
with os.fdopen(fd, "w") as f:
|
|
171
|
+
yaml.dump(config.to_dict(), f, default_flow_style=False)
|
|
172
|
+
os.rename(temp_path, CONFIG_PATH)
|
|
173
|
+
except Exception:
|
|
174
|
+
# Clean up temp file on failure
|
|
175
|
+
try:
|
|
176
|
+
os.unlink(temp_path)
|
|
177
|
+
except OSError:
|
|
178
|
+
pass
|
|
179
|
+
raise
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Shared operation metadata models."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Any, Dict
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class OperationMetadata:
|
|
10
|
+
"""Shared operation metadata."""
|
|
11
|
+
|
|
12
|
+
entity: str
|
|
13
|
+
action: str
|
|
14
|
+
timestamp: datetime
|
|
15
|
+
timing_ms: float | None = None
|
|
16
|
+
status_code: int | None = None
|
|
17
|
+
error_type: str | None = None
|
|
18
|
+
error_message: str | None = None
|
|
19
|
+
params: Dict[str, Any] | None = None
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Shared redaction logic for both logging and telemetry."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict
|
|
4
|
+
from urllib.parse import parse_qs, urlencode, urlparse, urlunparse
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DataRedactor:
|
|
8
|
+
"""Shared redaction logic for both logging and telemetry."""
|
|
9
|
+
|
|
10
|
+
SENSITIVE_HEADER_PATTERNS = [
|
|
11
|
+
"authorization",
|
|
12
|
+
"api-key",
|
|
13
|
+
"x-api-key",
|
|
14
|
+
"token",
|
|
15
|
+
"bearer",
|
|
16
|
+
"secret",
|
|
17
|
+
"password",
|
|
18
|
+
"credential",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
SENSITIVE_PARAM_PATTERNS = [
|
|
22
|
+
"password",
|
|
23
|
+
"secret",
|
|
24
|
+
"api_key",
|
|
25
|
+
"apikey",
|
|
26
|
+
"token",
|
|
27
|
+
"credentials",
|
|
28
|
+
"auth",
|
|
29
|
+
"key",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
@staticmethod
|
|
33
|
+
def redact_headers(headers: Dict[str, str]) -> Dict[str, str]:
|
|
34
|
+
"""Redact sensitive headers."""
|
|
35
|
+
redacted = {}
|
|
36
|
+
for key, value in headers.items():
|
|
37
|
+
if any(pattern in key.lower() for pattern in DataRedactor.SENSITIVE_HEADER_PATTERNS):
|
|
38
|
+
redacted[key] = "***REDACTED***"
|
|
39
|
+
else:
|
|
40
|
+
redacted[key] = value
|
|
41
|
+
return redacted
|
|
42
|
+
|
|
43
|
+
@staticmethod
|
|
44
|
+
def redact_params(params: Dict[str, Any]) -> Dict[str, Any]:
|
|
45
|
+
"""Redact sensitive parameters."""
|
|
46
|
+
redacted = {}
|
|
47
|
+
for key, value in params.items():
|
|
48
|
+
if any(pattern in key.lower() for pattern in DataRedactor.SENSITIVE_PARAM_PATTERNS):
|
|
49
|
+
redacted[key] = "***REDACTED***"
|
|
50
|
+
else:
|
|
51
|
+
redacted[key] = value
|
|
52
|
+
return redacted
|
|
53
|
+
|
|
54
|
+
@staticmethod
|
|
55
|
+
def redact_url(url: str) -> str:
|
|
56
|
+
"""Redact sensitive query params from URL."""
|
|
57
|
+
parsed = urlparse(url)
|
|
58
|
+
if not parsed.query:
|
|
59
|
+
return url
|
|
60
|
+
|
|
61
|
+
params = parse_qs(parsed.query)
|
|
62
|
+
redacted_params = {}
|
|
63
|
+
|
|
64
|
+
for key, values in params.items():
|
|
65
|
+
if any(pattern in key.lower() for pattern in DataRedactor.SENSITIVE_PARAM_PATTERNS):
|
|
66
|
+
redacted_params[key] = ["***REDACTED***"] * len(values)
|
|
67
|
+
else:
|
|
68
|
+
redacted_params[key] = values
|
|
69
|
+
|
|
70
|
+
# Reconstruct URL with redacted params
|
|
71
|
+
new_query = urlencode(redacted_params, doseq=True)
|
|
72
|
+
return urlunparse(
|
|
73
|
+
(
|
|
74
|
+
parsed.scheme,
|
|
75
|
+
parsed.netloc,
|
|
76
|
+
parsed.path,
|
|
77
|
+
parsed.params,
|
|
78
|
+
new_query,
|
|
79
|
+
parsed.fragment,
|
|
80
|
+
)
|
|
81
|
+
)
|