amazon-ads-mcp 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- amazon_ads_mcp/__init__.py +11 -0
- amazon_ads_mcp/auth/__init__.py +33 -0
- amazon_ads_mcp/auth/base.py +211 -0
- amazon_ads_mcp/auth/hooks.py +172 -0
- amazon_ads_mcp/auth/manager.py +791 -0
- amazon_ads_mcp/auth/oauth_state_store.py +277 -0
- amazon_ads_mcp/auth/providers/__init__.py +14 -0
- amazon_ads_mcp/auth/providers/direct.py +393 -0
- amazon_ads_mcp/auth/providers/example_auth0.py.example +216 -0
- amazon_ads_mcp/auth/providers/openbridge.py +512 -0
- amazon_ads_mcp/auth/registry.py +146 -0
- amazon_ads_mcp/auth/secure_token_store.py +297 -0
- amazon_ads_mcp/auth/token_store.py +723 -0
- amazon_ads_mcp/config/__init__.py +5 -0
- amazon_ads_mcp/config/sampling.py +111 -0
- amazon_ads_mcp/config/settings.py +366 -0
- amazon_ads_mcp/exceptions.py +314 -0
- amazon_ads_mcp/middleware/__init__.py +11 -0
- amazon_ads_mcp/middleware/authentication.py +1474 -0
- amazon_ads_mcp/middleware/caching.py +177 -0
- amazon_ads_mcp/middleware/oauth.py +175 -0
- amazon_ads_mcp/middleware/sampling.py +112 -0
- amazon_ads_mcp/models/__init__.py +320 -0
- amazon_ads_mcp/models/amc_models.py +837 -0
- amazon_ads_mcp/models/api_responses.py +847 -0
- amazon_ads_mcp/models/base_models.py +215 -0
- amazon_ads_mcp/models/builtin_responses.py +496 -0
- amazon_ads_mcp/models/dsp_models.py +556 -0
- amazon_ads_mcp/models/stores_brands.py +610 -0
- amazon_ads_mcp/server/__init__.py +6 -0
- amazon_ads_mcp/server/__main__.py +6 -0
- amazon_ads_mcp/server/builtin_prompts.py +269 -0
- amazon_ads_mcp/server/builtin_tools.py +962 -0
- amazon_ads_mcp/server/file_routes.py +547 -0
- amazon_ads_mcp/server/html_templates.py +149 -0
- amazon_ads_mcp/server/mcp_server.py +327 -0
- amazon_ads_mcp/server/openapi_utils.py +158 -0
- amazon_ads_mcp/server/sampling_handler.py +251 -0
- amazon_ads_mcp/server/server_builder.py +751 -0
- amazon_ads_mcp/server/sidecar_loader.py +178 -0
- amazon_ads_mcp/server/transform_executor.py +827 -0
- amazon_ads_mcp/tools/__init__.py +22 -0
- amazon_ads_mcp/tools/cache_management.py +105 -0
- amazon_ads_mcp/tools/download_tools.py +267 -0
- amazon_ads_mcp/tools/identity.py +236 -0
- amazon_ads_mcp/tools/oauth.py +598 -0
- amazon_ads_mcp/tools/profile.py +150 -0
- amazon_ads_mcp/tools/profile_listing.py +285 -0
- amazon_ads_mcp/tools/region.py +320 -0
- amazon_ads_mcp/tools/region_identity.py +175 -0
- amazon_ads_mcp/utils/__init__.py +6 -0
- amazon_ads_mcp/utils/async_compat.py +215 -0
- amazon_ads_mcp/utils/errors.py +452 -0
- amazon_ads_mcp/utils/export_content_type_resolver.py +249 -0
- amazon_ads_mcp/utils/export_download_handler.py +579 -0
- amazon_ads_mcp/utils/header_resolver.py +81 -0
- amazon_ads_mcp/utils/http/__init__.py +56 -0
- amazon_ads_mcp/utils/http/circuit_breaker.py +127 -0
- amazon_ads_mcp/utils/http/client_manager.py +329 -0
- amazon_ads_mcp/utils/http/request.py +207 -0
- amazon_ads_mcp/utils/http/resilience.py +512 -0
- amazon_ads_mcp/utils/http/resilient_client.py +195 -0
- amazon_ads_mcp/utils/http/retry.py +76 -0
- amazon_ads_mcp/utils/http_client.py +873 -0
- amazon_ads_mcp/utils/media/__init__.py +21 -0
- amazon_ads_mcp/utils/media/negotiator.py +243 -0
- amazon_ads_mcp/utils/media/types.py +199 -0
- amazon_ads_mcp/utils/openapi/__init__.py +16 -0
- amazon_ads_mcp/utils/openapi/json.py +55 -0
- amazon_ads_mcp/utils/openapi/loader.py +263 -0
- amazon_ads_mcp/utils/openapi/refs.py +46 -0
- amazon_ads_mcp/utils/region_config.py +200 -0
- amazon_ads_mcp/utils/response_wrapper.py +171 -0
- amazon_ads_mcp/utils/sampling_helpers.py +156 -0
- amazon_ads_mcp/utils/sampling_wrapper.py +173 -0
- amazon_ads_mcp/utils/security.py +630 -0
- amazon_ads_mcp/utils/tool_naming.py +137 -0
- amazon_ads_mcp-0.2.7.dist-info/METADATA +664 -0
- amazon_ads_mcp-0.2.7.dist-info/RECORD +82 -0
- amazon_ads_mcp-0.2.7.dist-info/WHEEL +4 -0
- amazon_ads_mcp-0.2.7.dist-info/entry_points.txt +3 -0
- amazon_ads_mcp-0.2.7.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,579 @@
|
|
|
1
|
+
"""Export and report download handler for Amazon Ads API.
|
|
2
|
+
|
|
3
|
+
This module provides functionality for downloading and storing Amazon Ads API
|
|
4
|
+
exports and reports in a structured local directory hierarchy.
|
|
5
|
+
|
|
6
|
+
Key Features:
|
|
7
|
+
- Organized file storage with timestamped naming
|
|
8
|
+
- Support for multiple export types (campaigns, adgroups, ads, targets)
|
|
9
|
+
- Metadata storage alongside downloaded files
|
|
10
|
+
- Automatic file extension detection
|
|
11
|
+
- Resource type-based directory organization
|
|
12
|
+
|
|
13
|
+
Directory Structure:
|
|
14
|
+
data/
|
|
15
|
+
├── exports/
|
|
16
|
+
│ ├── campaigns/
|
|
17
|
+
│ ├── adgroups/
|
|
18
|
+
│ ├── ads/
|
|
19
|
+
│ └── targets/
|
|
20
|
+
├── reports/
|
|
21
|
+
│ ├── brandmetrics/
|
|
22
|
+
│ └── general/
|
|
23
|
+
└── downloads/
|
|
24
|
+
└── general/
|
|
25
|
+
|
|
26
|
+
Dependencies:
|
|
27
|
+
- httpx: Async HTTP client for downloads
|
|
28
|
+
- pathlib: Path manipulation
|
|
29
|
+
- asyncio: Asynchronous operations
|
|
30
|
+
|
|
31
|
+
Environment Variables:
|
|
32
|
+
- AMAZON_ADS_DOWNLOAD_DIR: Custom download directory path
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
import gzip
|
|
36
|
+
import json
|
|
37
|
+
import logging
|
|
38
|
+
import re
|
|
39
|
+
from datetime import datetime
|
|
40
|
+
from pathlib import Path
|
|
41
|
+
from typing import Any
|
|
42
|
+
from urllib.parse import urlparse
|
|
43
|
+
|
|
44
|
+
import httpx
|
|
45
|
+
|
|
46
|
+
# S3 downloads use plain httpx client, not authenticated client
|
|
47
|
+
|
|
48
|
+
logger = logging.getLogger(__name__)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class ExportDownloadHandler:
|
|
52
|
+
"""Handles downloading and storing Amazon Ads API exports and reports.
|
|
53
|
+
|
|
54
|
+
This class provides a comprehensive solution for managing export downloads
|
|
55
|
+
with organized storage, metadata tracking, and automatic file organization.
|
|
56
|
+
|
|
57
|
+
Downloads are organized in a hierarchical structure:
|
|
58
|
+
data/<resource_type>/<sub_type>/<timestamp>_<filename>
|
|
59
|
+
|
|
60
|
+
Examples:
|
|
61
|
+
- data/exports/campaigns/20250108_153045_campaign_export.csv
|
|
62
|
+
- data/reports/brandmetrics/20250108_160000_brand_lift_report.json
|
|
63
|
+
- data/exports/adgroups/20250108_170000_adgroup_export.csv
|
|
64
|
+
|
|
65
|
+
The handler automatically:
|
|
66
|
+
- Creates directory structures as needed
|
|
67
|
+
- Generates timestamped filenames
|
|
68
|
+
- Detects file extensions from content-type headers
|
|
69
|
+
- Stores metadata alongside downloaded files
|
|
70
|
+
- Organizes files by resource type and subtype
|
|
71
|
+
|
|
72
|
+
:param base_dir: Base directory for all downloads (default: ./data)
|
|
73
|
+
:type base_dir: Path | None
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
def __init__(self, base_dir: Path = None):
|
|
77
|
+
"""Initialize the download handler.
|
|
78
|
+
|
|
79
|
+
Creates the base directory structure and initializes the handler
|
|
80
|
+
for managing export downloads.
|
|
81
|
+
|
|
82
|
+
:param base_dir: Base directory for downloads (default: ./data)
|
|
83
|
+
:type base_dir: Path | None
|
|
84
|
+
"""
|
|
85
|
+
self.base_dir = base_dir or Path.cwd() / "data"
|
|
86
|
+
self.base_dir.mkdir(exist_ok=True)
|
|
87
|
+
self.client_initialized = False
|
|
88
|
+
logger.info(
|
|
89
|
+
f"Download handler initialized with base directory: {self.base_dir}"
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
def get_profile_base_dir(self, profile_id: str | None) -> Path:
|
|
93
|
+
"""Get the base directory for a profile.
|
|
94
|
+
|
|
95
|
+
For profile-scoped storage: data/profiles/{profile_id}/
|
|
96
|
+
For legacy storage (no profile): data/
|
|
97
|
+
|
|
98
|
+
:param profile_id: Profile ID for scoping, or None for legacy mode
|
|
99
|
+
:type profile_id: str | None
|
|
100
|
+
:return: Base directory for the profile
|
|
101
|
+
:rtype: Path
|
|
102
|
+
"""
|
|
103
|
+
if profile_id:
|
|
104
|
+
profile_dir = self.base_dir / "profiles" / profile_id
|
|
105
|
+
profile_dir.mkdir(parents=True, exist_ok=True)
|
|
106
|
+
return profile_dir
|
|
107
|
+
return self.base_dir
|
|
108
|
+
|
|
109
|
+
def get_resource_path(
|
|
110
|
+
self,
|
|
111
|
+
url: str,
|
|
112
|
+
export_type: str | None = None,
|
|
113
|
+
profile_id: str | None = None,
|
|
114
|
+
) -> Path:
|
|
115
|
+
"""Determine the resource path from URL and export type.
|
|
116
|
+
|
|
117
|
+
Analyzes the URL structure to determine the appropriate directory
|
|
118
|
+
for storing the downloaded file. Creates the directory structure
|
|
119
|
+
if it doesn't exist.
|
|
120
|
+
|
|
121
|
+
When profile_id is provided, files are stored under:
|
|
122
|
+
data/profiles/{profile_id}/{resource_type}/{sub_type}/
|
|
123
|
+
|
|
124
|
+
When profile_id is None (legacy mode):
|
|
125
|
+
data/{resource_type}/{sub_type}/
|
|
126
|
+
|
|
127
|
+
:param url: The URL being accessed
|
|
128
|
+
:type url: str
|
|
129
|
+
:param export_type: Optional explicit export type (campaign, adgroup, etc.)
|
|
130
|
+
:type export_type: str | None
|
|
131
|
+
:param profile_id: Optional profile ID for scoped storage
|
|
132
|
+
:type profile_id: str | None
|
|
133
|
+
:return: Path object for the resource directory
|
|
134
|
+
:rtype: Path
|
|
135
|
+
"""
|
|
136
|
+
parsed = urlparse(url)
|
|
137
|
+
path_parts = parsed.path.strip("/").split("/")
|
|
138
|
+
|
|
139
|
+
# Check for S3 offline report storage
|
|
140
|
+
# Validate hostname is legitimate AWS domain
|
|
141
|
+
hostname = (parsed.hostname or "").lower()
|
|
142
|
+
is_aws_domain = hostname.endswith(".amazonaws.com") or hostname == "amazonaws.com"
|
|
143
|
+
if hostname and is_aws_domain:
|
|
144
|
+
if any(
|
|
145
|
+
pattern in hostname
|
|
146
|
+
for pattern in [
|
|
147
|
+
"offline-report-storage",
|
|
148
|
+
"report-storage",
|
|
149
|
+
"s3",
|
|
150
|
+
]
|
|
151
|
+
):
|
|
152
|
+
resource_type = "reports"
|
|
153
|
+
sub_type = "s3-reports"
|
|
154
|
+
else:
|
|
155
|
+
resource_type = "downloads"
|
|
156
|
+
sub_type = "s3"
|
|
157
|
+
# Determine resource type from path
|
|
158
|
+
elif "exports" in path_parts:
|
|
159
|
+
resource_type = "exports"
|
|
160
|
+
# Try to determine sub-type from export_type or URL
|
|
161
|
+
if export_type:
|
|
162
|
+
sub_type = export_type.lower()
|
|
163
|
+
else:
|
|
164
|
+
# Default sub-types based on common patterns
|
|
165
|
+
sub_type = "general"
|
|
166
|
+
elif "reports" in path_parts:
|
|
167
|
+
resource_type = "reports"
|
|
168
|
+
# Extract report type if available
|
|
169
|
+
idx = path_parts.index("reports")
|
|
170
|
+
if idx + 1 < len(path_parts):
|
|
171
|
+
sub_type = path_parts[idx + 1].lower()
|
|
172
|
+
else:
|
|
173
|
+
sub_type = "general"
|
|
174
|
+
# Check filename patterns in path
|
|
175
|
+
elif parsed.path and any(
|
|
176
|
+
pattern in parsed.path.lower() for pattern in ["report-", ".json.gz"]
|
|
177
|
+
):
|
|
178
|
+
resource_type = "reports"
|
|
179
|
+
sub_type = "async"
|
|
180
|
+
elif "brandMetrics" in parsed.path:
|
|
181
|
+
resource_type = "reports"
|
|
182
|
+
sub_type = "brandmetrics"
|
|
183
|
+
# Use export_type hint if provided
|
|
184
|
+
elif export_type:
|
|
185
|
+
if export_type.lower() in ["report", "reports"]:
|
|
186
|
+
resource_type = "reports"
|
|
187
|
+
sub_type = export_type.lower()
|
|
188
|
+
elif export_type.lower() in ["export", "exports"]:
|
|
189
|
+
resource_type = "exports"
|
|
190
|
+
sub_type = export_type.lower()
|
|
191
|
+
else:
|
|
192
|
+
resource_type = "downloads"
|
|
193
|
+
sub_type = export_type.lower()
|
|
194
|
+
else:
|
|
195
|
+
# Generic download
|
|
196
|
+
resource_type = "downloads"
|
|
197
|
+
sub_type = path_parts[0] if path_parts else "general"
|
|
198
|
+
|
|
199
|
+
# Get the appropriate base directory (profile-scoped or legacy)
|
|
200
|
+
base = self.get_profile_base_dir(profile_id)
|
|
201
|
+
|
|
202
|
+
# Create the directory structure
|
|
203
|
+
resource_path = base / resource_type / sub_type
|
|
204
|
+
resource_path.mkdir(parents=True, exist_ok=True)
|
|
205
|
+
|
|
206
|
+
return resource_path
|
|
207
|
+
|
|
208
|
+
def _infer_filename_and_type(
|
|
209
|
+
self,
|
|
210
|
+
url: str,
|
|
211
|
+
content_disposition: str | None,
|
|
212
|
+
content_type: str | None,
|
|
213
|
+
export_id: str,
|
|
214
|
+
) -> tuple[str, bool]:
|
|
215
|
+
"""Infer filename and whether content is gzipped.
|
|
216
|
+
|
|
217
|
+
:param url: The download URL
|
|
218
|
+
:param content_disposition: Content-Disposition header value
|
|
219
|
+
:param content_type: Content-Type header value
|
|
220
|
+
:param export_id: Export ID for fallback naming
|
|
221
|
+
:return: tuple of (filename, is_gzipped)
|
|
222
|
+
"""
|
|
223
|
+
# Try Content-Disposition first
|
|
224
|
+
if content_disposition and "filename=" in content_disposition:
|
|
225
|
+
match = re.search(r'filename="?([^\"]+)"?', content_disposition)
|
|
226
|
+
if match:
|
|
227
|
+
filename = match.group(1)
|
|
228
|
+
is_gzipped = (
|
|
229
|
+
filename.endswith(".gz") or "gzip" in (content_type or "").lower()
|
|
230
|
+
)
|
|
231
|
+
return filename, is_gzipped
|
|
232
|
+
|
|
233
|
+
# Try to get filename from URL
|
|
234
|
+
parsed = urlparse(url)
|
|
235
|
+
path_parts = parsed.path.rstrip("/").split("/")
|
|
236
|
+
if path_parts and path_parts[-1]:
|
|
237
|
+
filename = path_parts[-1]
|
|
238
|
+
# Check if it looks like a filename (has extension)
|
|
239
|
+
if "." in filename:
|
|
240
|
+
is_gzipped = (
|
|
241
|
+
filename.endswith(".gz") or "gzip" in (content_type or "").lower()
|
|
242
|
+
)
|
|
243
|
+
return filename, is_gzipped
|
|
244
|
+
|
|
245
|
+
# Fall back to content-type based extension
|
|
246
|
+
is_gzipped = False
|
|
247
|
+
if content_type:
|
|
248
|
+
content_type_lower = content_type.lower()
|
|
249
|
+
if "gzip" in content_type_lower or "x-gzip" in content_type_lower:
|
|
250
|
+
extension = ".json.gz" # Assume JSON inside gzip
|
|
251
|
+
is_gzipped = True
|
|
252
|
+
elif "json" in content_type_lower:
|
|
253
|
+
extension = ".json"
|
|
254
|
+
elif "xml" in content_type_lower:
|
|
255
|
+
extension = ".xml"
|
|
256
|
+
elif "csv" in content_type_lower:
|
|
257
|
+
extension = ".csv"
|
|
258
|
+
elif "parquet" in content_type_lower:
|
|
259
|
+
extension = ".parquet"
|
|
260
|
+
elif "octet-stream" in content_type_lower:
|
|
261
|
+
# For binary/octet-stream, try to infer from URL
|
|
262
|
+
if ".json.gz" in url:
|
|
263
|
+
extension = ".json.gz"
|
|
264
|
+
is_gzipped = True
|
|
265
|
+
elif ".csv.gz" in url:
|
|
266
|
+
extension = ".csv.gz"
|
|
267
|
+
is_gzipped = True
|
|
268
|
+
elif ".gz" in url:
|
|
269
|
+
extension = ".gz"
|
|
270
|
+
is_gzipped = True
|
|
271
|
+
else:
|
|
272
|
+
extension = ".bin"
|
|
273
|
+
else:
|
|
274
|
+
extension = ".bin" # Binary/unknown
|
|
275
|
+
else:
|
|
276
|
+
# No content-type, try URL
|
|
277
|
+
if ".json.gz" in url:
|
|
278
|
+
extension = ".json.gz"
|
|
279
|
+
is_gzipped = True
|
|
280
|
+
elif ".gz" in url:
|
|
281
|
+
extension = ".gz"
|
|
282
|
+
is_gzipped = True
|
|
283
|
+
else:
|
|
284
|
+
extension = ".bin"
|
|
285
|
+
|
|
286
|
+
# Generate filename
|
|
287
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
288
|
+
# Truncate export_id if too long
|
|
289
|
+
clean_id = export_id[:8] if len(export_id) > 8 else export_id
|
|
290
|
+
filename = f"{timestamp}_export_{clean_id}{extension}"
|
|
291
|
+
|
|
292
|
+
return filename, is_gzipped
|
|
293
|
+
|
|
294
|
+
def generate_filename(
|
|
295
|
+
self,
|
|
296
|
+
original_name: str | None = None,
|
|
297
|
+
export_id: str | None = None,
|
|
298
|
+
extension: str = ".csv",
|
|
299
|
+
) -> str:
|
|
300
|
+
"""Generate a timestamped filename.
|
|
301
|
+
|
|
302
|
+
Creates a unique filename with timestamp prefix for organizing
|
|
303
|
+
downloaded files chronologically.
|
|
304
|
+
|
|
305
|
+
:param original_name: Original filename if available
|
|
306
|
+
:type original_name: str | None
|
|
307
|
+
:param export_id: Export ID to include in filename
|
|
308
|
+
:type export_id: str | None
|
|
309
|
+
:param extension: File extension (default: .csv)
|
|
310
|
+
:type extension: str
|
|
311
|
+
:return: Timestamped filename
|
|
312
|
+
:rtype: str
|
|
313
|
+
"""
|
|
314
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
315
|
+
|
|
316
|
+
if original_name:
|
|
317
|
+
# Clean the original name
|
|
318
|
+
name = Path(original_name).stem
|
|
319
|
+
ext = Path(original_name).suffix or extension
|
|
320
|
+
return f"{timestamp}_{name}{ext}"
|
|
321
|
+
elif export_id:
|
|
322
|
+
# Use export ID in filename
|
|
323
|
+
# Truncate if too long
|
|
324
|
+
clean_id = export_id[:20] if len(export_id) > 20 else export_id
|
|
325
|
+
return f"{timestamp}_export_{clean_id}{extension}"
|
|
326
|
+
else:
|
|
327
|
+
return f"{timestamp}_download{extension}"
|
|
328
|
+
|
|
329
|
+
async def download_export(
|
|
330
|
+
self,
|
|
331
|
+
export_url: str,
|
|
332
|
+
export_id: str,
|
|
333
|
+
export_type: str | None = None,
|
|
334
|
+
metadata: dict[str, Any] | None = None,
|
|
335
|
+
profile_id: str | None = None,
|
|
336
|
+
) -> Path:
|
|
337
|
+
"""Download an export file and store it locally.
|
|
338
|
+
|
|
339
|
+
Downloads an export file from the provided URL and stores it in the
|
|
340
|
+
appropriate local directory structure. Automatically detects file
|
|
341
|
+
extensions and creates metadata files.
|
|
342
|
+
|
|
343
|
+
When profile_id is provided, files are stored under:
|
|
344
|
+
data/profiles/{profile_id}/{resource_type}/{sub_type}/
|
|
345
|
+
|
|
346
|
+
:param export_url: URL to download the export from
|
|
347
|
+
:type export_url: str
|
|
348
|
+
:param export_id: Export ID for identification
|
|
349
|
+
:type export_id: str
|
|
350
|
+
:param export_type: Type of export (campaign, adgroup, etc.)
|
|
351
|
+
:type export_type: str | None
|
|
352
|
+
:param metadata: Optional metadata to store alongside the file
|
|
353
|
+
:type metadata: dict[str, Any] | None
|
|
354
|
+
:param profile_id: Optional profile ID for scoped storage
|
|
355
|
+
:type profile_id: str | None
|
|
356
|
+
:return: Path to the downloaded file
|
|
357
|
+
:rtype: Path
|
|
358
|
+
:raises httpx.HTTPStatusError: When download fails
|
|
359
|
+
:raises Exception: When file operations fail
|
|
360
|
+
"""
|
|
361
|
+
# Determine where to save (profile-scoped or legacy)
|
|
362
|
+
resource_path = self.get_resource_path(export_url, export_type, profile_id)
|
|
363
|
+
|
|
364
|
+
# Use plain httpx client for S3 URLs (they don't need auth headers)
|
|
365
|
+
async with httpx.AsyncClient(timeout=httpx.Timeout(60.0)) as client:
|
|
366
|
+
response = await client.get(export_url)
|
|
367
|
+
response.raise_for_status()
|
|
368
|
+
|
|
369
|
+
# Determine filename and gzip state
|
|
370
|
+
cd = response.headers.get("content-disposition")
|
|
371
|
+
ct = response.headers.get("content-type")
|
|
372
|
+
filename, is_gzipped = self._infer_filename_and_type(
|
|
373
|
+
export_url, cd, ct, export_id
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
# Paths
|
|
377
|
+
file_path = resource_path / filename
|
|
378
|
+
final_path = file_path
|
|
379
|
+
|
|
380
|
+
# Save original bytes
|
|
381
|
+
content_bytes = response.content
|
|
382
|
+
with open(file_path, "wb") as f:
|
|
383
|
+
f.write(content_bytes)
|
|
384
|
+
|
|
385
|
+
# If gzipped, also decompress to base (without .gz)
|
|
386
|
+
if is_gzipped and filename.endswith(".gz"):
|
|
387
|
+
try:
|
|
388
|
+
decompressed = gzip.decompress(content_bytes)
|
|
389
|
+
base_name = filename[:-3] # strip .gz
|
|
390
|
+
decompressed_path = resource_path / base_name
|
|
391
|
+
with open(decompressed_path, "wb") as out:
|
|
392
|
+
out.write(decompressed)
|
|
393
|
+
final_path = decompressed_path
|
|
394
|
+
logger.info(
|
|
395
|
+
f"Downloaded and decompressed export to: {final_path} (original: {file_path})"
|
|
396
|
+
)
|
|
397
|
+
except Exception as e:
|
|
398
|
+
logger.warning(
|
|
399
|
+
f"Failed to decompress gzip content, keeping original: {e}"
|
|
400
|
+
)
|
|
401
|
+
else:
|
|
402
|
+
logger.info(f"Downloaded export to: {final_path}")
|
|
403
|
+
|
|
404
|
+
# Save metadata if provided
|
|
405
|
+
if metadata:
|
|
406
|
+
meta_path = final_path.with_suffix(".meta.json")
|
|
407
|
+
metadata = dict(metadata) # shallow copy to avoid side effects
|
|
408
|
+
metadata["download_timestamp"] = datetime.now().isoformat()
|
|
409
|
+
metadata["export_id"] = export_id
|
|
410
|
+
metadata["export_type"] = export_type
|
|
411
|
+
metadata["original_url"] = export_url
|
|
412
|
+
metadata["file_size"] = len(content_bytes)
|
|
413
|
+
metadata["original_filename"] = filename
|
|
414
|
+
metadata["content_type"] = ct
|
|
415
|
+
metadata["gzipped"] = is_gzipped
|
|
416
|
+
metadata["saved_path"] = str(final_path)
|
|
417
|
+
if profile_id:
|
|
418
|
+
metadata["profile_id"] = profile_id
|
|
419
|
+
|
|
420
|
+
with open(meta_path, "w", encoding="utf-8") as f:
|
|
421
|
+
json.dump(metadata, f, indent=2)
|
|
422
|
+
logger.debug(f"Saved metadata to: {meta_path}")
|
|
423
|
+
|
|
424
|
+
return final_path
|
|
425
|
+
|
|
426
|
+
async def handle_export_response(
|
|
427
|
+
self,
|
|
428
|
+
export_response: dict[str, Any],
|
|
429
|
+
export_type: str | None = None,
|
|
430
|
+
profile_id: str | None = None,
|
|
431
|
+
) -> Path | None:
|
|
432
|
+
"""Handle an export response, downloading if ready.
|
|
433
|
+
|
|
434
|
+
Processes an export response from the Amazon Ads API and automatically
|
|
435
|
+
downloads the file if the export is completed. Handles various export
|
|
436
|
+
statuses appropriately.
|
|
437
|
+
|
|
438
|
+
:param export_response: Response from GetExport API
|
|
439
|
+
:type export_response: dict[str, Any]
|
|
440
|
+
:param export_type: Type of export for organization
|
|
441
|
+
:type export_type: str | None
|
|
442
|
+
:param profile_id: Optional profile ID for scoped storage
|
|
443
|
+
:type profile_id: str | None
|
|
444
|
+
:return: Path to downloaded file if successful, None if not ready
|
|
445
|
+
:rtype: Path | None
|
|
446
|
+
"""
|
|
447
|
+
status = export_response.get("status", "UNKNOWN")
|
|
448
|
+
export_id = export_response.get("exportId", "unknown")
|
|
449
|
+
|
|
450
|
+
if status == "COMPLETED":
|
|
451
|
+
url = export_response.get("url")
|
|
452
|
+
if url:
|
|
453
|
+
# Download the export
|
|
454
|
+
try:
|
|
455
|
+
file_path = await self.download_export(
|
|
456
|
+
export_url=url,
|
|
457
|
+
export_id=export_id,
|
|
458
|
+
export_type=export_type,
|
|
459
|
+
metadata=export_response,
|
|
460
|
+
profile_id=profile_id,
|
|
461
|
+
)
|
|
462
|
+
return file_path
|
|
463
|
+
except Exception as e:
|
|
464
|
+
logger.error(f"Failed to download export {export_id}: {e}")
|
|
465
|
+
return None
|
|
466
|
+
else:
|
|
467
|
+
logger.warning(f"Export {export_id} completed but no URL provided")
|
|
468
|
+
return None
|
|
469
|
+
elif status == "PROCESSING":
|
|
470
|
+
logger.info(f"Export {export_id} is still processing")
|
|
471
|
+
return None
|
|
472
|
+
elif status == "FAILED":
|
|
473
|
+
error = export_response.get("error", {})
|
|
474
|
+
logger.error(f"Export {export_id} failed: {error}")
|
|
475
|
+
return None
|
|
476
|
+
else:
|
|
477
|
+
logger.warning(f"Export {export_id} has unknown status: {status}")
|
|
478
|
+
return None
|
|
479
|
+
|
|
480
|
+
def list_downloads(
|
|
481
|
+
self,
|
|
482
|
+
resource_type: str | None = None,
|
|
483
|
+
profile_id: str | None = None,
|
|
484
|
+
) -> list[dict[str, Any]]:
|
|
485
|
+
"""List all downloaded files.
|
|
486
|
+
|
|
487
|
+
Scans the download directory structure and returns information about
|
|
488
|
+
all downloaded files, optionally filtered by resource type and/or profile.
|
|
489
|
+
|
|
490
|
+
When profile_id is provided:
|
|
491
|
+
- Lists files only from data/profiles/{profile_id}/
|
|
492
|
+
|
|
493
|
+
When profile_id is None:
|
|
494
|
+
- Lists files only from legacy (non-profile) directories
|
|
495
|
+
- Excludes the profiles/ directory
|
|
496
|
+
|
|
497
|
+
:param resource_type: Optional filter by resource type
|
|
498
|
+
:type resource_type: str | None
|
|
499
|
+
:param profile_id: Optional profile ID for scoped listing
|
|
500
|
+
:type profile_id: str | None
|
|
501
|
+
:return: List of file information dictionaries
|
|
502
|
+
:rtype: list[dict[str, Any]]
|
|
503
|
+
"""
|
|
504
|
+
files = []
|
|
505
|
+
|
|
506
|
+
# Determine base directory based on profile scoping
|
|
507
|
+
if profile_id:
|
|
508
|
+
base = self.base_dir / "profiles" / profile_id
|
|
509
|
+
if not base.exists():
|
|
510
|
+
return []
|
|
511
|
+
else:
|
|
512
|
+
# Legacy mode: list from base_dir, but exclude profiles/
|
|
513
|
+
base = self.base_dir
|
|
514
|
+
|
|
515
|
+
# Determine search paths
|
|
516
|
+
if resource_type:
|
|
517
|
+
search_paths = [base / resource_type]
|
|
518
|
+
else:
|
|
519
|
+
search_paths = [
|
|
520
|
+
p for p in base.iterdir()
|
|
521
|
+
if p.is_dir() and p.name != "profiles" # Exclude profiles dir in legacy
|
|
522
|
+
]
|
|
523
|
+
|
|
524
|
+
for resource_dir in search_paths:
|
|
525
|
+
if not resource_dir.exists():
|
|
526
|
+
continue
|
|
527
|
+
|
|
528
|
+
# Recursively find all files
|
|
529
|
+
for file_path in resource_dir.rglob("*"):
|
|
530
|
+
if file_path.is_file() and not file_path.name.endswith(".meta.json"):
|
|
531
|
+
stat = file_path.stat()
|
|
532
|
+
relative_path = file_path.relative_to(base)
|
|
533
|
+
files.append(
|
|
534
|
+
{
|
|
535
|
+
"name": file_path.name,
|
|
536
|
+
"path": str(relative_path),
|
|
537
|
+
"full_path": str(file_path),
|
|
538
|
+
"size": stat.st_size,
|
|
539
|
+
"modified": datetime.fromtimestamp(
|
|
540
|
+
stat.st_mtime
|
|
541
|
+
).isoformat(),
|
|
542
|
+
"has_metadata": file_path.with_suffix(".meta.json").exists(),
|
|
543
|
+
}
|
|
544
|
+
)
|
|
545
|
+
|
|
546
|
+
return files
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
# Global handler instance
|
|
550
|
+
_download_handler: ExportDownloadHandler | None = None
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
def get_download_handler(
|
|
554
|
+
base_dir: Path | None = None,
|
|
555
|
+
) -> ExportDownloadHandler:
|
|
556
|
+
"""Get or create the global download handler.
|
|
557
|
+
|
|
558
|
+
Provides a singleton instance of ExportDownloadHandler for consistent
|
|
559
|
+
download management across the application. Respects environment
|
|
560
|
+
variable configuration.
|
|
561
|
+
|
|
562
|
+
:param base_dir: Base directory for downloads
|
|
563
|
+
:type base_dir: Path | None
|
|
564
|
+
:return: ExportDownloadHandler instance
|
|
565
|
+
:rtype: ExportDownloadHandler
|
|
566
|
+
"""
|
|
567
|
+
global _download_handler
|
|
568
|
+
if _download_handler is None:
|
|
569
|
+
# Use environment variable if set, otherwise use provided dir or default
|
|
570
|
+
import os
|
|
571
|
+
|
|
572
|
+
env_dir = os.environ.get("AMAZON_ADS_DOWNLOAD_DIR")
|
|
573
|
+
if env_dir:
|
|
574
|
+
base_dir = Path(env_dir)
|
|
575
|
+
elif base_dir is None:
|
|
576
|
+
base_dir = Path.cwd() / "data"
|
|
577
|
+
|
|
578
|
+
_download_handler = ExportDownloadHandler(base_dir)
|
|
579
|
+
return _download_handler
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Header name resolution for Amazon Ads API specifications.
|
|
3
|
+
|
|
4
|
+
This module discovers and normalizes header names from OpenAPI specs,
|
|
5
|
+
providing consistent header naming across different API versions.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
from typing import Iterable, Optional, Set
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class HeaderNameResolver:
|
|
13
|
+
"""Resolver for discovering and normalizing API header names."""
|
|
14
|
+
|
|
15
|
+
_CLIENT_PAT = re.compile(
|
|
16
|
+
r"(amazon[- ]advertising[- ]api[- ]clientid|amazon[- ]ads[- ]clientid|client[-_ ]id)$",
|
|
17
|
+
re.I,
|
|
18
|
+
)
|
|
19
|
+
_SCOPE_PAT = re.compile(r"(amazon[- ]advertising[- ]api[- ]scope|scope)$", re.I)
|
|
20
|
+
_ACCOUNT_PAT = re.compile(r"(amazon[- ]ads[- ]accountid|account[-_ ]id)$", re.I)
|
|
21
|
+
|
|
22
|
+
def __init__(self) -> None:
|
|
23
|
+
self.client_header_names: Set[str] = set()
|
|
24
|
+
self.scope_header_names: Set[str] = set()
|
|
25
|
+
self.account_header_names: Set[str] = set()
|
|
26
|
+
|
|
27
|
+
def add_from_spec(self, spec: dict) -> None:
|
|
28
|
+
"""Extract header names from an OpenAPI specification."""
|
|
29
|
+
params = (spec.get("components") or {}).get("parameters") or {}
|
|
30
|
+
|
|
31
|
+
for param_name, param_def in params.items():
|
|
32
|
+
if not isinstance(param_def, dict):
|
|
33
|
+
continue
|
|
34
|
+
if param_def.get("in") != "header":
|
|
35
|
+
continue
|
|
36
|
+
|
|
37
|
+
name = param_def.get("name", "")
|
|
38
|
+
if not name:
|
|
39
|
+
continue
|
|
40
|
+
|
|
41
|
+
low = name.lower()
|
|
42
|
+
if self._CLIENT_PAT.search(low):
|
|
43
|
+
self.client_header_names.add(name)
|
|
44
|
+
if self._SCOPE_PAT.search(low):
|
|
45
|
+
self.scope_header_names.add(name)
|
|
46
|
+
if self._ACCOUNT_PAT.search(low):
|
|
47
|
+
self.account_header_names.add(name)
|
|
48
|
+
|
|
49
|
+
@staticmethod
|
|
50
|
+
def _prefer(names: Iterable[str], fallbacks: Iterable[str]) -> Optional[str]:
|
|
51
|
+
"""
|
|
52
|
+
Select the preferred header name from discovered names.
|
|
53
|
+
|
|
54
|
+
Prefers Amazon-Advertising-API-* headers over others.
|
|
55
|
+
"""
|
|
56
|
+
discovered = [n for n in dict.fromkeys(names) if n]
|
|
57
|
+
if discovered:
|
|
58
|
+
# Prefer Amazon-Advertising-API-* headers
|
|
59
|
+
aa = [
|
|
60
|
+
n for n in discovered if n.lower().startswith("amazon-advertising-api-")
|
|
61
|
+
]
|
|
62
|
+
return aa[0] if aa else discovered[0]
|
|
63
|
+
|
|
64
|
+
# Use fallback if no discovered names
|
|
65
|
+
for f in fallbacks:
|
|
66
|
+
return f
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
def prefer_client(self) -> Optional[str]:
|
|
70
|
+
"""Get the preferred client ID header name."""
|
|
71
|
+
return self._prefer(
|
|
72
|
+
self.client_header_names, ["Amazon-Advertising-API-ClientId"]
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
def prefer_scope(self) -> Optional[str]:
|
|
76
|
+
"""Get the preferred scope header name."""
|
|
77
|
+
return self._prefer(self.scope_header_names, ["Amazon-Advertising-API-Scope"])
|
|
78
|
+
|
|
79
|
+
def prefer_account(self) -> Optional[str]:
|
|
80
|
+
"""Get the preferred account ID header name."""
|
|
81
|
+
return self._prefer(self.account_header_names, ["Amazon-Ads-AccountId"])
|