amazon-ads-mcp 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- amazon_ads_mcp/__init__.py +11 -0
- amazon_ads_mcp/auth/__init__.py +33 -0
- amazon_ads_mcp/auth/base.py +211 -0
- amazon_ads_mcp/auth/hooks.py +172 -0
- amazon_ads_mcp/auth/manager.py +791 -0
- amazon_ads_mcp/auth/oauth_state_store.py +277 -0
- amazon_ads_mcp/auth/providers/__init__.py +14 -0
- amazon_ads_mcp/auth/providers/direct.py +393 -0
- amazon_ads_mcp/auth/providers/example_auth0.py.example +216 -0
- amazon_ads_mcp/auth/providers/openbridge.py +512 -0
- amazon_ads_mcp/auth/registry.py +146 -0
- amazon_ads_mcp/auth/secure_token_store.py +297 -0
- amazon_ads_mcp/auth/token_store.py +723 -0
- amazon_ads_mcp/config/__init__.py +5 -0
- amazon_ads_mcp/config/sampling.py +111 -0
- amazon_ads_mcp/config/settings.py +366 -0
- amazon_ads_mcp/exceptions.py +314 -0
- amazon_ads_mcp/middleware/__init__.py +11 -0
- amazon_ads_mcp/middleware/authentication.py +1474 -0
- amazon_ads_mcp/middleware/caching.py +177 -0
- amazon_ads_mcp/middleware/oauth.py +175 -0
- amazon_ads_mcp/middleware/sampling.py +112 -0
- amazon_ads_mcp/models/__init__.py +320 -0
- amazon_ads_mcp/models/amc_models.py +837 -0
- amazon_ads_mcp/models/api_responses.py +847 -0
- amazon_ads_mcp/models/base_models.py +215 -0
- amazon_ads_mcp/models/builtin_responses.py +496 -0
- amazon_ads_mcp/models/dsp_models.py +556 -0
- amazon_ads_mcp/models/stores_brands.py +610 -0
- amazon_ads_mcp/server/__init__.py +6 -0
- amazon_ads_mcp/server/__main__.py +6 -0
- amazon_ads_mcp/server/builtin_prompts.py +269 -0
- amazon_ads_mcp/server/builtin_tools.py +962 -0
- amazon_ads_mcp/server/file_routes.py +547 -0
- amazon_ads_mcp/server/html_templates.py +149 -0
- amazon_ads_mcp/server/mcp_server.py +327 -0
- amazon_ads_mcp/server/openapi_utils.py +158 -0
- amazon_ads_mcp/server/sampling_handler.py +251 -0
- amazon_ads_mcp/server/server_builder.py +751 -0
- amazon_ads_mcp/server/sidecar_loader.py +178 -0
- amazon_ads_mcp/server/transform_executor.py +827 -0
- amazon_ads_mcp/tools/__init__.py +22 -0
- amazon_ads_mcp/tools/cache_management.py +105 -0
- amazon_ads_mcp/tools/download_tools.py +267 -0
- amazon_ads_mcp/tools/identity.py +236 -0
- amazon_ads_mcp/tools/oauth.py +598 -0
- amazon_ads_mcp/tools/profile.py +150 -0
- amazon_ads_mcp/tools/profile_listing.py +285 -0
- amazon_ads_mcp/tools/region.py +320 -0
- amazon_ads_mcp/tools/region_identity.py +175 -0
- amazon_ads_mcp/utils/__init__.py +6 -0
- amazon_ads_mcp/utils/async_compat.py +215 -0
- amazon_ads_mcp/utils/errors.py +452 -0
- amazon_ads_mcp/utils/export_content_type_resolver.py +249 -0
- amazon_ads_mcp/utils/export_download_handler.py +579 -0
- amazon_ads_mcp/utils/header_resolver.py +81 -0
- amazon_ads_mcp/utils/http/__init__.py +56 -0
- amazon_ads_mcp/utils/http/circuit_breaker.py +127 -0
- amazon_ads_mcp/utils/http/client_manager.py +329 -0
- amazon_ads_mcp/utils/http/request.py +207 -0
- amazon_ads_mcp/utils/http/resilience.py +512 -0
- amazon_ads_mcp/utils/http/resilient_client.py +195 -0
- amazon_ads_mcp/utils/http/retry.py +76 -0
- amazon_ads_mcp/utils/http_client.py +873 -0
- amazon_ads_mcp/utils/media/__init__.py +21 -0
- amazon_ads_mcp/utils/media/negotiator.py +243 -0
- amazon_ads_mcp/utils/media/types.py +199 -0
- amazon_ads_mcp/utils/openapi/__init__.py +16 -0
- amazon_ads_mcp/utils/openapi/json.py +55 -0
- amazon_ads_mcp/utils/openapi/loader.py +263 -0
- amazon_ads_mcp/utils/openapi/refs.py +46 -0
- amazon_ads_mcp/utils/region_config.py +200 -0
- amazon_ads_mcp/utils/response_wrapper.py +171 -0
- amazon_ads_mcp/utils/sampling_helpers.py +156 -0
- amazon_ads_mcp/utils/sampling_wrapper.py +173 -0
- amazon_ads_mcp/utils/security.py +630 -0
- amazon_ads_mcp/utils/tool_naming.py +137 -0
- amazon_ads_mcp-0.2.7.dist-info/METADATA +664 -0
- amazon_ads_mcp-0.2.7.dist-info/RECORD +82 -0
- amazon_ads_mcp-0.2.7.dist-info/WHEEL +4 -0
- amazon_ads_mcp-0.2.7.dist-info/entry_points.txt +3 -0
- amazon_ads_mcp-0.2.7.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,827 @@
|
|
|
1
|
+
"""Transform executor for declarative tool transformations.
|
|
2
|
+
|
|
3
|
+
This module provides the DeclarativeTransformExecutor class that handles
|
|
4
|
+
executing declarative transform rules from sidecar files. It supports
|
|
5
|
+
input/output transformations, call transformations with pagination and
|
|
6
|
+
batching, and various data coercion operations.
|
|
7
|
+
|
|
8
|
+
The executor is designed to work with FastMCP servers and provides
|
|
9
|
+
safe fallbacks when transformations fail.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
import logging
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any, Callable, Dict, List, Optional
|
|
16
|
+
|
|
17
|
+
from ..utils.async_compat import install_compatibility_policy
|
|
18
|
+
|
|
19
|
+
# Install compatibility policy if needed (no monkey-patching)
|
|
20
|
+
install_compatibility_policy()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class DeclarativeTransformExecutor:
|
|
24
|
+
"""Execute declarative transform rules from sidecars.
|
|
25
|
+
|
|
26
|
+
This minimal executor supports:
|
|
27
|
+
- parse_payload: json_or_yaml
|
|
28
|
+
- apply_preset: (stub hook)
|
|
29
|
+
- coerce: enum_case, date_yyyy_mm_dd (stub hooks)
|
|
30
|
+
- compose: variable substitution "$var" within dict structures
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(self, namespace: str, rules: Dict[str, Any]):
|
|
34
|
+
"""Initialize the transform executor with namespace and rules.
|
|
35
|
+
|
|
36
|
+
:param namespace: Namespace identifier for the executor
|
|
37
|
+
:type namespace: str
|
|
38
|
+
:param rules: Dictionary containing transform rules and configuration
|
|
39
|
+
:type rules: Dict[str, Any]
|
|
40
|
+
"""
|
|
41
|
+
self.namespace = namespace
|
|
42
|
+
self.rules = rules or {}
|
|
43
|
+
self.version = rules.get("version", "1.0")
|
|
44
|
+
self._preset_cache: Dict[str, Any] = {}
|
|
45
|
+
self._logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
|
|
46
|
+
|
|
47
|
+
def create_input_transform(
|
|
48
|
+
self, rule: Dict[str, Any]
|
|
49
|
+
) -> Optional[Callable[[Dict[str, Any]], Any]]:
|
|
50
|
+
"""Create an input transformation function from rule configuration.
|
|
51
|
+
|
|
52
|
+
Generates an async function that applies various input transformations
|
|
53
|
+
including payload parsing, preset application, data coercion, and
|
|
54
|
+
default value injection.
|
|
55
|
+
|
|
56
|
+
:param rule: Rule configuration dictionary containing input_transform
|
|
57
|
+
settings
|
|
58
|
+
:type rule: Dict[str, Any]
|
|
59
|
+
:return: Async transformation function or None if no input transform
|
|
60
|
+
is configured
|
|
61
|
+
:rtype: Optional[Callable[[Dict[str, Any]], Any]]
|
|
62
|
+
"""
|
|
63
|
+
cfg = rule.get("input_transform")
|
|
64
|
+
if not cfg:
|
|
65
|
+
return None
|
|
66
|
+
|
|
67
|
+
async def _transform(args: Dict[str, Any]) -> Dict[str, Any]:
|
|
68
|
+
try:
|
|
69
|
+
a = dict(args or {})
|
|
70
|
+
|
|
71
|
+
# parse payload if requested
|
|
72
|
+
if cfg.get("parse_payload") == "json_or_yaml" and "payload" in a:
|
|
73
|
+
a["payload"] = self._parse_flexible(a.get("payload"))
|
|
74
|
+
|
|
75
|
+
# apply presets
|
|
76
|
+
if cfg.get("apply_preset") and a.get("preset_id"):
|
|
77
|
+
a["payload"] = await self._apply_preset(
|
|
78
|
+
a.get("payload"), a.get("preset_id")
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
# coercions
|
|
82
|
+
a = self._apply_coercions(a, cfg.get("coerce", []))
|
|
83
|
+
|
|
84
|
+
# defaults: relative time (e.g., set minCreationTime if missing)
|
|
85
|
+
defaults = cfg.get("defaults") if isinstance(cfg, dict) else None
|
|
86
|
+
if isinstance(defaults, dict):
|
|
87
|
+
rel = defaults.get("relative_time")
|
|
88
|
+
if isinstance(rel, dict):
|
|
89
|
+
import datetime as _dt
|
|
90
|
+
|
|
91
|
+
for key, spec in rel.items():
|
|
92
|
+
try:
|
|
93
|
+
if key not in a or a.get(key) in (None, ""):
|
|
94
|
+
days = int((spec or {}).get("days_ago", 30))
|
|
95
|
+
base = _dt.datetime.now(_dt.timezone.utc)
|
|
96
|
+
target = base - _dt.timedelta(days=days)
|
|
97
|
+
epoch = _dt.datetime(
|
|
98
|
+
1970, 1, 1, tzinfo=_dt.timezone.utc
|
|
99
|
+
)
|
|
100
|
+
a[key] = int(
|
|
101
|
+
(target - epoch).total_seconds() * 1000
|
|
102
|
+
)
|
|
103
|
+
except Exception:
|
|
104
|
+
continue
|
|
105
|
+
|
|
106
|
+
# require_any_of: ensure at least one of param groups is present
|
|
107
|
+
require_any = (
|
|
108
|
+
cfg.get("require_any_of") if isinstance(cfg, dict) else None
|
|
109
|
+
)
|
|
110
|
+
if isinstance(require_any, list):
|
|
111
|
+
for group in require_any:
|
|
112
|
+
if isinstance(group, (list, tuple)):
|
|
113
|
+
if not any(
|
|
114
|
+
(g in a and a.get(g) not in (None, "")) for g in group
|
|
115
|
+
):
|
|
116
|
+
# If none present, leave defaults to satisfy; otherwise pass-through
|
|
117
|
+
pass
|
|
118
|
+
|
|
119
|
+
# compose final request dict
|
|
120
|
+
composed = cfg.get("compose")
|
|
121
|
+
if isinstance(composed, dict):
|
|
122
|
+
return self._compose_structure(composed, a)
|
|
123
|
+
return a
|
|
124
|
+
except Exception:
|
|
125
|
+
# Fail-safe: return original args to avoid breaking calls
|
|
126
|
+
return dict(args or {})
|
|
127
|
+
|
|
128
|
+
return _transform
|
|
129
|
+
|
|
130
|
+
def create_output_transform(
|
|
131
|
+
self, rule: Dict[str, Any]
|
|
132
|
+
) -> Optional[Callable[[Any], Any]]:
|
|
133
|
+
"""Create an output transformation function from rule configuration.
|
|
134
|
+
|
|
135
|
+
Generates an async function that applies output transformations
|
|
136
|
+
including projection, sampling, summary wrapping, and artifact
|
|
137
|
+
threshold handling.
|
|
138
|
+
|
|
139
|
+
:param rule: Rule configuration dictionary containing output_transform
|
|
140
|
+
settings
|
|
141
|
+
:type rule: Dict[str, Any]
|
|
142
|
+
:return: Async transformation function or None if no output transform
|
|
143
|
+
is configured
|
|
144
|
+
:rtype: Optional[Callable[[Any], Any]]
|
|
145
|
+
"""
|
|
146
|
+
cfg = rule.get("output_transform")
|
|
147
|
+
if not cfg:
|
|
148
|
+
return None
|
|
149
|
+
|
|
150
|
+
async def _transform(resp: Any) -> Any:
|
|
151
|
+
try:
|
|
152
|
+
out = resp
|
|
153
|
+
# Projection
|
|
154
|
+
projection = cfg.get("projection") if isinstance(cfg, dict) else None
|
|
155
|
+
if projection and isinstance(out, dict):
|
|
156
|
+
out = {k: out.get(k) for k in projection}
|
|
157
|
+
|
|
158
|
+
# Sampling of list fields
|
|
159
|
+
sample_n = cfg.get("sample_n") if isinstance(cfg, dict) else None
|
|
160
|
+
if isinstance(sample_n, int) and sample_n > 0:
|
|
161
|
+
out = self._truncate_lists(out, sample_n)
|
|
162
|
+
|
|
163
|
+
# Summary wrapper
|
|
164
|
+
summary = cfg.get("summary") if isinstance(cfg, dict) else None
|
|
165
|
+
if summary and isinstance(out, dict):
|
|
166
|
+
out = {
|
|
167
|
+
"summary": {k: out.get(k) for k in summary},
|
|
168
|
+
"data": out,
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
# Artifact threshold
|
|
172
|
+
thresh = (
|
|
173
|
+
cfg.get("artifact_threshold_bytes")
|
|
174
|
+
if isinstance(cfg, dict)
|
|
175
|
+
else None
|
|
176
|
+
)
|
|
177
|
+
if isinstance(thresh, int) and thresh > 0:
|
|
178
|
+
try:
|
|
179
|
+
s = json.dumps(out, ensure_ascii=False)
|
|
180
|
+
size = len(s.encode("utf-8"))
|
|
181
|
+
if size > thresh:
|
|
182
|
+
base = Path.cwd() / "data" / "amc"
|
|
183
|
+
base.mkdir(parents=True, exist_ok=True)
|
|
184
|
+
from time import time
|
|
185
|
+
|
|
186
|
+
fpath = (
|
|
187
|
+
base / f"artifact_{self.namespace}_{int(time())}.json"
|
|
188
|
+
)
|
|
189
|
+
fpath.write_text(s, encoding="utf-8")
|
|
190
|
+
return {
|
|
191
|
+
"artifact_path": str(fpath),
|
|
192
|
+
"size_bytes": size,
|
|
193
|
+
"truncated": True,
|
|
194
|
+
}
|
|
195
|
+
except Exception:
|
|
196
|
+
pass
|
|
197
|
+
|
|
198
|
+
return out
|
|
199
|
+
except Exception:
|
|
200
|
+
return resp
|
|
201
|
+
|
|
202
|
+
return _transform
|
|
203
|
+
|
|
204
|
+
def create_call_transform(
|
|
205
|
+
self, rule: Dict[str, Any]
|
|
206
|
+
) -> Optional[Callable[..., Any]]:
|
|
207
|
+
"""Create a wrapper that can implement pagination and batching.
|
|
208
|
+
|
|
209
|
+
Expected FastMCP call signature (subject to availability):
|
|
210
|
+
async def call_transform(call_next, args: dict) -> any
|
|
211
|
+
|
|
212
|
+
If unsupported in the current FastMCP build, sidecar_loader will skip.
|
|
213
|
+
|
|
214
|
+
:param rule: Rule configuration dictionary containing pagination
|
|
215
|
+
and batching settings
|
|
216
|
+
:type rule: Dict[str, Any]
|
|
217
|
+
:return: Async call transformation function or None if no call
|
|
218
|
+
transform is needed
|
|
219
|
+
:rtype: Optional[Callable[..., Any]]
|
|
220
|
+
"""
|
|
221
|
+
pagination = rule.get("pagination") if isinstance(rule, dict) else None
|
|
222
|
+
batch = rule.get("batch") if isinstance(rule, dict) else None
|
|
223
|
+
output_cfg = rule.get("output_transform") if isinstance(rule, dict) else None
|
|
224
|
+
if not pagination and not batch:
|
|
225
|
+
# Still allow call-level shaping based on args if output_cfg exists
|
|
226
|
+
if not output_cfg:
|
|
227
|
+
return None
|
|
228
|
+
|
|
229
|
+
async def _call(
|
|
230
|
+
call_next: Callable[[Dict[str, Any]], Any], args: Dict[str, Any]
|
|
231
|
+
):
|
|
232
|
+
# Apply batching first if configured and payload is a list
|
|
233
|
+
if batch and isinstance(batch, dict):
|
|
234
|
+
size = int(batch.get("size", 0) or 0)
|
|
235
|
+
path = batch.get("path") or "payload"
|
|
236
|
+
if size > 0:
|
|
237
|
+
lst = self._get_by_path(args, path)
|
|
238
|
+
if isinstance(lst, list) and len(lst) > size:
|
|
239
|
+
results: List[Any] = []
|
|
240
|
+
batch_errors: List[Dict[str, Any]] = []
|
|
241
|
+
for i in range(0, len(lst), size):
|
|
242
|
+
chunk = lst[i : i + size]
|
|
243
|
+
chunk_args = dict(args)
|
|
244
|
+
self._set_by_path(chunk_args, path, chunk)
|
|
245
|
+
try:
|
|
246
|
+
res = await call_next(chunk_args)
|
|
247
|
+
results.append(res)
|
|
248
|
+
except Exception as e:
|
|
249
|
+
batch_errors.append(
|
|
250
|
+
{"chunk": i // size, "error": str(e)}
|
|
251
|
+
)
|
|
252
|
+
continue
|
|
253
|
+
# Smart aggregation
|
|
254
|
+
if all(isinstance(r, dict) for r in results):
|
|
255
|
+
if all(isinstance(r.get("items"), list) for r in results):
|
|
256
|
+
merged = []
|
|
257
|
+
for r in results:
|
|
258
|
+
merged.extend(r.get("items", []))
|
|
259
|
+
out = {"items": merged, "count": len(merged)}
|
|
260
|
+
if batch_errors:
|
|
261
|
+
out["batch_errors"] = batch_errors
|
|
262
|
+
return out
|
|
263
|
+
if all("errors" in r for r in results):
|
|
264
|
+
errs: List[Any] = []
|
|
265
|
+
for r in results:
|
|
266
|
+
errs.extend(r.get("errors", []))
|
|
267
|
+
out = {
|
|
268
|
+
"errors": errs,
|
|
269
|
+
"batches_processed": len(results),
|
|
270
|
+
}
|
|
271
|
+
if batch_errors:
|
|
272
|
+
out["batch_errors"] = batch_errors
|
|
273
|
+
return out
|
|
274
|
+
out = {"batches": len(results), "results": results}
|
|
275
|
+
if batch_errors:
|
|
276
|
+
out["batch_errors"] = batch_errors
|
|
277
|
+
return out
|
|
278
|
+
|
|
279
|
+
# Pagination handling
|
|
280
|
+
if (
|
|
281
|
+
pagination
|
|
282
|
+
and isinstance(pagination, dict)
|
|
283
|
+
and pagination.get("all_pages")
|
|
284
|
+
):
|
|
285
|
+
param_name = pagination.get("param_name") or "nextToken"
|
|
286
|
+
response_key = pagination.get("response_key") or "nextToken"
|
|
287
|
+
limit_param = pagination.get("limit_param")
|
|
288
|
+
# Seed args (do not mutate original)
|
|
289
|
+
cur_args = dict(args)
|
|
290
|
+
if (
|
|
291
|
+
limit_param
|
|
292
|
+
and "default_limit" in pagination
|
|
293
|
+
and limit_param not in cur_args
|
|
294
|
+
):
|
|
295
|
+
# put limit into params if present
|
|
296
|
+
# Assume top-level arg schema; sidecar input_transform can also inject
|
|
297
|
+
cur_args[limit_param] = pagination.get("default_limit")
|
|
298
|
+
|
|
299
|
+
pages: List[Any] = []
|
|
300
|
+
next_token = None
|
|
301
|
+
page_count = 0
|
|
302
|
+
max_pages = int(pagination.get("max_pages", 100) or 100)
|
|
303
|
+
while True:
|
|
304
|
+
if next_token:
|
|
305
|
+
cur_args[param_name] = next_token
|
|
306
|
+
res = await call_next(cur_args)
|
|
307
|
+
pages.append(res)
|
|
308
|
+
# Extract next token from response
|
|
309
|
+
next_token = None
|
|
310
|
+
if isinstance(res, dict):
|
|
311
|
+
next_token = res.get(response_key)
|
|
312
|
+
page_count += 1
|
|
313
|
+
if not next_token or page_count >= max_pages:
|
|
314
|
+
break
|
|
315
|
+
shaped = {"pages": page_count, "results": pages}
|
|
316
|
+
# Apply optional output shaping on aggregated results
|
|
317
|
+
if output_cfg:
|
|
318
|
+
shaped = self._shape_output(shaped, output_cfg, args)
|
|
319
|
+
return shaped
|
|
320
|
+
|
|
321
|
+
# Default: single call
|
|
322
|
+
res = await call_next(args)
|
|
323
|
+
if output_cfg:
|
|
324
|
+
res = self._shape_output(res, output_cfg, args)
|
|
325
|
+
return res
|
|
326
|
+
|
|
327
|
+
return _call
|
|
328
|
+
|
|
329
|
+
def _compose_structure(self, template: Any, args: Dict[str, Any]) -> Any:
|
|
330
|
+
"""Compose a structure using template and variable substitution.
|
|
331
|
+
|
|
332
|
+
Recursively processes a template structure, replacing variables
|
|
333
|
+
starting with '$' with values from the args dictionary.
|
|
334
|
+
|
|
335
|
+
:param template: Template structure (dict, list, or scalar)
|
|
336
|
+
:type template: Any
|
|
337
|
+
:param args: Arguments dictionary for variable substitution
|
|
338
|
+
:type args: Dict[str, Any]
|
|
339
|
+
:return: Composed structure with variables substituted
|
|
340
|
+
:rtype: Any
|
|
341
|
+
"""
|
|
342
|
+
if isinstance(template, dict):
|
|
343
|
+
out = {}
|
|
344
|
+
for k, v in template.items():
|
|
345
|
+
out[k] = self._compose_structure(v, args)
|
|
346
|
+
return out
|
|
347
|
+
if isinstance(template, list):
|
|
348
|
+
return [self._compose_structure(x, args) for x in template]
|
|
349
|
+
if isinstance(template, str) and template.startswith("$"):
|
|
350
|
+
return args.get(template[1:])
|
|
351
|
+
return template
|
|
352
|
+
|
|
353
|
+
def _apply_coercions(self, args: Dict[str, Any], kinds: Any) -> Dict[str, Any]:
|
|
354
|
+
"""Apply specified data type coercions to arguments.
|
|
355
|
+
|
|
356
|
+
:param args: Arguments dictionary to apply coercions to
|
|
357
|
+
:type args: Dict[str, Any]
|
|
358
|
+
:param kinds: List of coercion types to apply
|
|
359
|
+
:type kinds: Any
|
|
360
|
+
:return: Arguments with coercions applied
|
|
361
|
+
:rtype: Dict[str, Any]
|
|
362
|
+
"""
|
|
363
|
+
if not kinds:
|
|
364
|
+
return args
|
|
365
|
+
data = dict(args)
|
|
366
|
+
for kind in kinds:
|
|
367
|
+
if kind == "enum_case":
|
|
368
|
+
data = self._coerce_enum_case(data)
|
|
369
|
+
elif kind == "date_yyyy_mm_dd":
|
|
370
|
+
data = self._coerce_dates(data)
|
|
371
|
+
elif kind == "number_to_string":
|
|
372
|
+
data = self._coerce_numbers_to_strings(data)
|
|
373
|
+
elif kind == "iso_to_epoch_ms":
|
|
374
|
+
data = self._coerce_iso_to_epoch_ms(data)
|
|
375
|
+
return data
|
|
376
|
+
|
|
377
|
+
def _walk(self, obj: Any, fn: Callable[[Any], Any]) -> Any:
|
|
378
|
+
"""Recursively walk through a data structure applying a function.
|
|
379
|
+
|
|
380
|
+
:param obj: Object to walk through (dict, list, or scalar)
|
|
381
|
+
:type obj: Any
|
|
382
|
+
:param fn: Function to apply to each scalar value
|
|
383
|
+
:type fn: Callable[[Any], Any]
|
|
384
|
+
:return: New structure with function applied to all scalar values
|
|
385
|
+
:rtype: Any
|
|
386
|
+
"""
|
|
387
|
+
if isinstance(obj, dict):
|
|
388
|
+
return {k: self._walk(v, fn) for k, v in obj.items()}
|
|
389
|
+
if isinstance(obj, list):
|
|
390
|
+
return [self._walk(x, fn) for x in obj]
|
|
391
|
+
return fn(obj)
|
|
392
|
+
|
|
393
|
+
def _coerce_enum_case(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
|
394
|
+
"""Convert string values to uppercase for enum-like fields.
|
|
395
|
+
|
|
396
|
+
:param data: Data dictionary to process
|
|
397
|
+
:type data: Dict[str, Any]
|
|
398
|
+
:return: Data with string values converted to uppercase
|
|
399
|
+
:rtype: Dict[str, Any]
|
|
400
|
+
"""
|
|
401
|
+
|
|
402
|
+
def fn(v: Any) -> Any:
|
|
403
|
+
if isinstance(v, str) and v.isalpha():
|
|
404
|
+
return v.upper()
|
|
405
|
+
return v
|
|
406
|
+
|
|
407
|
+
return self._walk(data, fn)
|
|
408
|
+
|
|
409
|
+
def _coerce_dates(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
|
410
|
+
"""Convert various date formats to YYYY-MM-DD format.
|
|
411
|
+
|
|
412
|
+
:param data: Data dictionary to process
|
|
413
|
+
:type data: Dict[str, Any]
|
|
414
|
+
:return: Data with date strings normalized to YYYY-MM-DD format
|
|
415
|
+
:rtype: Dict[str, Any]
|
|
416
|
+
"""
|
|
417
|
+
from datetime import datetime
|
|
418
|
+
|
|
419
|
+
def fn(v: Any) -> Any:
|
|
420
|
+
if isinstance(v, str):
|
|
421
|
+
for fmt in (
|
|
422
|
+
"%Y-%m-%d",
|
|
423
|
+
"%Y/%m/%d",
|
|
424
|
+
"%m/%d/%Y",
|
|
425
|
+
"%Y-%m-%dT%H:%M:%S",
|
|
426
|
+
"%Y-%m-%dT%H:%M:%S.%fZ",
|
|
427
|
+
):
|
|
428
|
+
try:
|
|
429
|
+
dt = datetime.strptime(v, fmt)
|
|
430
|
+
return dt.strftime("%Y-%m-%d")
|
|
431
|
+
except Exception:
|
|
432
|
+
continue
|
|
433
|
+
return v
|
|
434
|
+
|
|
435
|
+
return self._walk(data, fn)
|
|
436
|
+
|
|
437
|
+
def _coerce_numbers_to_strings(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
|
438
|
+
"""Convert numeric values to strings.
|
|
439
|
+
|
|
440
|
+
:param data: Data dictionary to process
|
|
441
|
+
:type data: Dict[str, Any]
|
|
442
|
+
:return: Data with numeric values converted to strings
|
|
443
|
+
:rtype: Dict[str, Any]
|
|
444
|
+
"""
|
|
445
|
+
|
|
446
|
+
def fn(v: Any) -> Any:
|
|
447
|
+
if isinstance(v, (int, float)):
|
|
448
|
+
return str(v)
|
|
449
|
+
return v
|
|
450
|
+
|
|
451
|
+
return self._walk(data, fn)
|
|
452
|
+
|
|
453
|
+
def _coerce_iso_to_epoch_ms(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
|
454
|
+
"""Convert ISO-like timestamps to epoch milliseconds for common time keys.
|
|
455
|
+
|
|
456
|
+
This function targets specific time-related fields and converts
|
|
457
|
+
various timestamp formats to epoch milliseconds for consistency.
|
|
458
|
+
|
|
459
|
+
:param data: Data dictionary to process
|
|
460
|
+
:type data: Dict[str, Any]
|
|
461
|
+
:return: Data with time fields converted to epoch milliseconds
|
|
462
|
+
:rtype: Dict[str, Any]
|
|
463
|
+
"""
|
|
464
|
+
targets = {
|
|
465
|
+
"minCreationTime",
|
|
466
|
+
"maxCreationTime",
|
|
467
|
+
"startTime",
|
|
468
|
+
"endTime",
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
def to_epoch_ms(val: Any) -> Any:
|
|
472
|
+
# Pass through if already int-like
|
|
473
|
+
if isinstance(val, int):
|
|
474
|
+
# Heuristic: assume seconds if < 10^12
|
|
475
|
+
return val if val > 10**12 else val * 1000
|
|
476
|
+
if isinstance(val, float):
|
|
477
|
+
return int(val * 1000)
|
|
478
|
+
if isinstance(val, str):
|
|
479
|
+
s = val.strip()
|
|
480
|
+
# Numeric string
|
|
481
|
+
if s.isdigit():
|
|
482
|
+
n = int(s)
|
|
483
|
+
return n if n > 10**12 else n * 1000
|
|
484
|
+
# Try ISO 8601
|
|
485
|
+
try:
|
|
486
|
+
import datetime as _dt
|
|
487
|
+
|
|
488
|
+
iso = s
|
|
489
|
+
if iso.endswith("Z"):
|
|
490
|
+
iso = iso[:-1] + "+00:00"
|
|
491
|
+
# Date only → assume midnight UTC
|
|
492
|
+
if len(iso) == 10 and iso.count("-") == 2:
|
|
493
|
+
iso = iso + "T00:00:00+00:00"
|
|
494
|
+
# Compact date YYYYMMDD
|
|
495
|
+
if len(s) == 8 and s.isdigit():
|
|
496
|
+
iso = f"{s[0:4]}-{s[4:6]}-{s[6:8]}T00:00:00+00:00"
|
|
497
|
+
dt = _dt.datetime.fromisoformat(iso)
|
|
498
|
+
if dt.tzinfo is None:
|
|
499
|
+
dt = dt.replace(tzinfo=_dt.timezone.utc)
|
|
500
|
+
epoch = _dt.datetime(1970, 1, 1, tzinfo=_dt.timezone.utc)
|
|
501
|
+
return int((dt - epoch).total_seconds() * 1000)
|
|
502
|
+
except Exception:
|
|
503
|
+
return val
|
|
504
|
+
return val
|
|
505
|
+
|
|
506
|
+
def walk(obj: Any) -> Any:
|
|
507
|
+
if isinstance(obj, dict):
|
|
508
|
+
out = {}
|
|
509
|
+
for k, v in obj.items():
|
|
510
|
+
if k in targets:
|
|
511
|
+
out[k] = to_epoch_ms(v)
|
|
512
|
+
else:
|
|
513
|
+
out[k] = walk(v)
|
|
514
|
+
return out
|
|
515
|
+
if isinstance(obj, list):
|
|
516
|
+
return [walk(x) for x in obj]
|
|
517
|
+
return obj
|
|
518
|
+
|
|
519
|
+
return walk(data)
|
|
520
|
+
|
|
521
|
+
def _coerce_iso_to_amc(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
|
522
|
+
"""Convert flexible timestamps to AMC ISO format 'YYYY-MM-DDTHH:MM:SS'.
|
|
523
|
+
|
|
524
|
+
This function converts various timestamp formats to the AMC-specific
|
|
525
|
+
ISO format for consistency across the system.
|
|
526
|
+
|
|
527
|
+
:param data: Data dictionary to process
|
|
528
|
+
:type data: Dict[str, Any]
|
|
529
|
+
:return: Data with time fields converted to AMC ISO format
|
|
530
|
+
:rtype: Dict[str, Any]
|
|
531
|
+
"""
|
|
532
|
+
targets = {
|
|
533
|
+
"minCreationTime",
|
|
534
|
+
"maxCreationTime",
|
|
535
|
+
"startTime",
|
|
536
|
+
"endTime",
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
def to_iso(val: Any) -> Any:
|
|
540
|
+
if isinstance(val, int):
|
|
541
|
+
# assume ms if large
|
|
542
|
+
n = val if val > 10**12 else val * 1000
|
|
543
|
+
import datetime as _dt
|
|
544
|
+
|
|
545
|
+
dt = _dt.datetime.fromtimestamp(n / 1000, tz=_dt.timezone.utc)
|
|
546
|
+
return dt.strftime("%Y-%m-%dT%H:%M:%S")
|
|
547
|
+
if isinstance(val, float):
|
|
548
|
+
import datetime as _dt
|
|
549
|
+
|
|
550
|
+
dt = _dt.datetime.fromtimestamp(val, tz=_dt.timezone.utc)
|
|
551
|
+
return dt.strftime("%Y-%m-%dT%H:%M:%S")
|
|
552
|
+
if isinstance(val, str):
|
|
553
|
+
s = val.strip()
|
|
554
|
+
if s.isdigit():
|
|
555
|
+
n = int(s)
|
|
556
|
+
if n < 10**12:
|
|
557
|
+
n *= 1000
|
|
558
|
+
import datetime as _dt
|
|
559
|
+
|
|
560
|
+
dt = _dt.datetime.fromtimestamp(n / 1000, tz=_dt.timezone.utc)
|
|
561
|
+
return dt.strftime("%Y-%m-%dT%H:%M:%S")
|
|
562
|
+
try:
|
|
563
|
+
import datetime as _dt
|
|
564
|
+
|
|
565
|
+
iso = s
|
|
566
|
+
if iso.endswith("Z"):
|
|
567
|
+
iso = iso[:-1] + "+00:00"
|
|
568
|
+
if len(iso) == 10 and iso.count("-") == 2:
|
|
569
|
+
iso = iso + "T00:00:00+00:00"
|
|
570
|
+
if len(s) == 8 and s.isdigit():
|
|
571
|
+
iso = f"{s[0:4]}-{s[4:6]}-{s[6:8]}T00:00:00+00:00"
|
|
572
|
+
dt = _dt.datetime.fromisoformat(iso)
|
|
573
|
+
if dt.tzinfo is None:
|
|
574
|
+
dt = dt.replace(tzinfo=_dt.timezone.utc)
|
|
575
|
+
return dt.astimezone(_dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%S")
|
|
576
|
+
except Exception:
|
|
577
|
+
return val
|
|
578
|
+
return val
|
|
579
|
+
|
|
580
|
+
def walk(obj: Any) -> Any:
|
|
581
|
+
if isinstance(obj, dict):
|
|
582
|
+
out = {}
|
|
583
|
+
for k, v in obj.items():
|
|
584
|
+
if k in targets:
|
|
585
|
+
out[k] = to_iso(v)
|
|
586
|
+
else:
|
|
587
|
+
out[k] = walk(v)
|
|
588
|
+
return out
|
|
589
|
+
if isinstance(obj, list):
|
|
590
|
+
return [walk(x) for x in obj]
|
|
591
|
+
return obj
|
|
592
|
+
|
|
593
|
+
return walk(data)
|
|
594
|
+
|
|
595
|
+
async def _apply_preset(self, payload: Any, preset_id: Optional[str]) -> Any:
|
|
596
|
+
"""Merge preset data into payload if preset file exists.
|
|
597
|
+
|
|
598
|
+
Preset search path: config/presets/<namespace>/<preset_id>.(json|yaml|yml)
|
|
599
|
+
|
|
600
|
+
:param payload: Original payload data
|
|
601
|
+
:type payload: Any
|
|
602
|
+
:param preset_id: Identifier for the preset to apply
|
|
603
|
+
:type preset_id: Optional[str]
|
|
604
|
+
:return: Payload with preset data merged in
|
|
605
|
+
:rtype: Any
|
|
606
|
+
"""
|
|
607
|
+
if not preset_id:
|
|
608
|
+
return payload
|
|
609
|
+
base = Path.cwd() / "config" / "presets" / self.namespace
|
|
610
|
+
candidates = [
|
|
611
|
+
base / f"{preset_id}.json",
|
|
612
|
+
base / f"{preset_id}.yaml",
|
|
613
|
+
base / f"{preset_id}.yml",
|
|
614
|
+
]
|
|
615
|
+
preset_data: Any = None
|
|
616
|
+
for p in candidates:
|
|
617
|
+
if p.exists():
|
|
618
|
+
try:
|
|
619
|
+
if p.suffix == ".json":
|
|
620
|
+
preset_data = json.loads(p.read_text(encoding="utf-8"))
|
|
621
|
+
else:
|
|
622
|
+
import yaml # type: ignore
|
|
623
|
+
|
|
624
|
+
preset_data = yaml.safe_load(p.read_text(encoding="utf-8"))
|
|
625
|
+
except Exception:
|
|
626
|
+
preset_data = None
|
|
627
|
+
break
|
|
628
|
+
if isinstance(preset_data, dict):
|
|
629
|
+
if not self._validate_preset(preset_data, str(preset_id)):
|
|
630
|
+
self._logger.warning("Preset %s failed validation", preset_id)
|
|
631
|
+
return payload
|
|
632
|
+
if isinstance(preset_data, dict) and isinstance(payload, dict):
|
|
633
|
+
return self._deep_merge_dicts(preset_data, payload)
|
|
634
|
+
return payload
|
|
635
|
+
|
|
636
|
+
def _parse_flexible(self, payload: Any) -> Any:
|
|
637
|
+
"""Parse payload as JSON or YAML if it's a string.
|
|
638
|
+
|
|
639
|
+
:param payload: Payload to parse
|
|
640
|
+
:type payload: Any
|
|
641
|
+
:return: Parsed payload or original if parsing fails
|
|
642
|
+
:rtype: Any
|
|
643
|
+
"""
|
|
644
|
+
if isinstance(payload, (dict, list)):
|
|
645
|
+
return payload
|
|
646
|
+
if isinstance(payload, str):
|
|
647
|
+
try:
|
|
648
|
+
return json.loads(payload)
|
|
649
|
+
except Exception:
|
|
650
|
+
try:
|
|
651
|
+
import yaml # type: ignore
|
|
652
|
+
|
|
653
|
+
return yaml.safe_load(payload)
|
|
654
|
+
except Exception:
|
|
655
|
+
return payload
|
|
656
|
+
return payload
|
|
657
|
+
|
|
658
|
+
def _deep_merge_dicts(self, a: Dict[str, Any], b: Dict[str, Any]) -> Dict[str, Any]:
|
|
659
|
+
"""Deep merge dict b into a (without mutating inputs).
|
|
660
|
+
|
|
661
|
+
:param a: Base dictionary
|
|
662
|
+
:type a: Dict[str, Any]
|
|
663
|
+
:param b: Dictionary to merge into base
|
|
664
|
+
:type b: Dict[str, Any]
|
|
665
|
+
:return: New merged dictionary
|
|
666
|
+
:rtype: Dict[str, Any]
|
|
667
|
+
"""
|
|
668
|
+
out: Dict[str, Any] = {}
|
|
669
|
+
keys = set(a.keys()) | set(b.keys())
|
|
670
|
+
for k in keys:
|
|
671
|
+
va = a.get(k)
|
|
672
|
+
vb = b.get(k)
|
|
673
|
+
if isinstance(va, dict) and isinstance(vb, dict):
|
|
674
|
+
out[k] = self._deep_merge_dicts(va, vb)
|
|
675
|
+
elif vb is not None:
|
|
676
|
+
out[k] = vb
|
|
677
|
+
else:
|
|
678
|
+
out[k] = va
|
|
679
|
+
return out
|
|
680
|
+
|
|
681
|
+
def _get_by_path(self, obj: Dict[str, Any], path: str) -> Any:
|
|
682
|
+
"""Get a value from a nested dictionary using dot notation path.
|
|
683
|
+
|
|
684
|
+
:param obj: Dictionary to traverse
|
|
685
|
+
:type obj: Dict[str, Any]
|
|
686
|
+
:param path: Dot-separated path to the target value
|
|
687
|
+
:type path: str
|
|
688
|
+
:return: Value at the specified path or None if not found
|
|
689
|
+
:rtype: Any
|
|
690
|
+
"""
|
|
691
|
+
cur: Any = obj
|
|
692
|
+
for part in path.split("."):
|
|
693
|
+
if isinstance(cur, dict):
|
|
694
|
+
cur = cur.get(part)
|
|
695
|
+
else:
|
|
696
|
+
return None
|
|
697
|
+
return cur
|
|
698
|
+
|
|
699
|
+
def _set_by_path(self, obj: Dict[str, Any], path: str, value: Any) -> None:
|
|
700
|
+
"""Set a value in a nested dictionary using dot notation path.
|
|
701
|
+
|
|
702
|
+
:param obj: Dictionary to modify
|
|
703
|
+
:type obj: Dict[str, Any]
|
|
704
|
+
:param path: Dot-separated path to the target location
|
|
705
|
+
:type path: str
|
|
706
|
+
:param value: Value to set at the specified path
|
|
707
|
+
:type value: Any
|
|
708
|
+
"""
|
|
709
|
+
parts = path.split(".")
|
|
710
|
+
cur: Any = obj
|
|
711
|
+
for p in parts[:-1]:
|
|
712
|
+
if p not in cur or not isinstance(cur[p], dict):
|
|
713
|
+
cur[p] = {}
|
|
714
|
+
cur = cur[p]
|
|
715
|
+
cur[parts[-1]] = value
|
|
716
|
+
|
|
717
|
+
def _validate_preset(self, preset_data: Dict[str, Any], preset_id: str) -> bool:
|
|
718
|
+
"""Basic preset validation hook.
|
|
719
|
+
|
|
720
|
+
In absence of per-operation schemas, ensure it's a dict and non-empty.
|
|
721
|
+
This can be extended to check required fields per operation.
|
|
722
|
+
|
|
723
|
+
:param preset_data: Preset data to validate
|
|
724
|
+
:type preset_data: Dict[str, Any]
|
|
725
|
+
:param preset_id: Identifier for the preset being validated
|
|
726
|
+
:type preset_id: str
|
|
727
|
+
:return: True if preset is valid, False otherwise
|
|
728
|
+
:rtype: bool
|
|
729
|
+
"""
|
|
730
|
+
return isinstance(preset_data, dict) and len(preset_data) > 0
|
|
731
|
+
|
|
732
|
+
def _shape_output(self, out: Any, cfg: Dict[str, Any], args: Dict[str, Any]) -> Any:
|
|
733
|
+
"""Apply output shaping rules with optional arg overrides.
|
|
734
|
+
|
|
735
|
+
:param out: Output data to shape
|
|
736
|
+
:type out: Any
|
|
737
|
+
:param cfg: Output transform configuration
|
|
738
|
+
:type cfg: Dict[str, Any]
|
|
739
|
+
:param args: Input arguments that may override configuration
|
|
740
|
+
:type args: Dict[str, Any]
|
|
741
|
+
:return: Shaped output data
|
|
742
|
+
:rtype: Any
|
|
743
|
+
"""
|
|
744
|
+
try:
|
|
745
|
+
result = out
|
|
746
|
+
# Arg overrides
|
|
747
|
+
view = (args or {}).get("view")
|
|
748
|
+
include_columns = bool((args or {}).get("include_columns"))
|
|
749
|
+
user_sample = (args or {}).get("sample_n")
|
|
750
|
+
|
|
751
|
+
# Projection
|
|
752
|
+
projection = cfg.get("projection") if isinstance(cfg, dict) else None
|
|
753
|
+
if view == "full" or include_columns:
|
|
754
|
+
projection = None
|
|
755
|
+
if projection and isinstance(result, dict):
|
|
756
|
+
result = {k: result.get(k) for k in projection}
|
|
757
|
+
|
|
758
|
+
# Sampling
|
|
759
|
+
sample_n = None
|
|
760
|
+
if isinstance(user_sample, int) and user_sample > 0:
|
|
761
|
+
sample_n = user_sample
|
|
762
|
+
elif isinstance(cfg.get("sample_n"), int) and cfg.get("sample_n") > 0:
|
|
763
|
+
sample_n = cfg.get("sample_n")
|
|
764
|
+
if sample_n:
|
|
765
|
+
result = self._truncate_lists(result, sample_n)
|
|
766
|
+
|
|
767
|
+
# Summary wrapper
|
|
768
|
+
summary = cfg.get("summary") if isinstance(cfg, dict) else None
|
|
769
|
+
if summary and isinstance(result, dict):
|
|
770
|
+
result = {
|
|
771
|
+
"summary": {k: result.get(k) for k in summary},
|
|
772
|
+
"data": result,
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
# Artifact threshold
|
|
776
|
+
thresh = (
|
|
777
|
+
cfg.get("artifact_threshold_bytes") if isinstance(cfg, dict) else None
|
|
778
|
+
)
|
|
779
|
+
if isinstance(thresh, int) and thresh > 0:
|
|
780
|
+
try:
|
|
781
|
+
s = json.dumps(result, ensure_ascii=False)
|
|
782
|
+
size = len(s.encode("utf-8"))
|
|
783
|
+
if size > thresh:
|
|
784
|
+
base = Path.cwd() / "data" / "amc"
|
|
785
|
+
base.mkdir(parents=True, exist_ok=True)
|
|
786
|
+
from time import time
|
|
787
|
+
|
|
788
|
+
fpath = base / f"artifact_{self.namespace}_{int(time())}.json"
|
|
789
|
+
fpath.write_text(s, encoding="utf-8")
|
|
790
|
+
return {
|
|
791
|
+
"artifact_path": str(fpath),
|
|
792
|
+
"size_bytes": size,
|
|
793
|
+
"truncated": True,
|
|
794
|
+
}
|
|
795
|
+
except Exception:
|
|
796
|
+
pass
|
|
797
|
+
return result
|
|
798
|
+
except Exception:
|
|
799
|
+
return out
|
|
800
|
+
|
|
801
|
+
def _truncate_lists(self, data: Any, n: int) -> Any:
|
|
802
|
+
"""Recursively truncate all lists in a structure to at most n items.
|
|
803
|
+
|
|
804
|
+
This helps prevent extremely large responses from overwhelming
|
|
805
|
+
the client context. Only list lengths are affected; scalars and
|
|
806
|
+
dict keys are preserved. The function does not mutate the input.
|
|
807
|
+
|
|
808
|
+
:param data: Arbitrary JSON-like structure (dict/list/scalars)
|
|
809
|
+
:type data: Any
|
|
810
|
+
:param n: Maximum number of items to keep in any list
|
|
811
|
+
:type n: int
|
|
812
|
+
:return: New structure with lists truncated to n items
|
|
813
|
+
:rtype: Any
|
|
814
|
+
"""
|
|
815
|
+
try:
|
|
816
|
+
|
|
817
|
+
def walk(obj: Any) -> Any:
|
|
818
|
+
if isinstance(obj, list):
|
|
819
|
+
return [walk(x) for x in obj[: max(0, n)]]
|
|
820
|
+
if isinstance(obj, dict):
|
|
821
|
+
return {k: walk(v) for k, v in obj.items()}
|
|
822
|
+
return obj
|
|
823
|
+
|
|
824
|
+
return walk(data)
|
|
825
|
+
except Exception:
|
|
826
|
+
# Fail safe: return original if anything goes wrong
|
|
827
|
+
return data
|