api-dock 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {api_dock-0.2.0 → api_dock-0.2.2}/PKG-INFO +1 -1
  2. {api_dock-0.2.0 → api_dock-0.2.2}/api_dock/database_config.py +39 -2
  3. {api_dock-0.2.0 → api_dock-0.2.2}/api_dock/route_mapper.py +47 -7
  4. api_dock-0.2.2/api_dock/storage_auth.py +435 -0
  5. {api_dock-0.2.0 → api_dock-0.2.2}/api_dock.egg-info/PKG-INFO +1 -1
  6. {api_dock-0.2.0 → api_dock-0.2.2}/pyproject.toml +1 -1
  7. api_dock-0.2.0/api_dock/storage_auth.py +0 -273
  8. {api_dock-0.2.0 → api_dock-0.2.2}/LICENSE.md +0 -0
  9. {api_dock-0.2.0 → api_dock-0.2.2}/README.md +0 -0
  10. {api_dock-0.2.0 → api_dock-0.2.2}/api_dock/__init__.py +0 -0
  11. {api_dock-0.2.0 → api_dock-0.2.2}/api_dock/cli.py +0 -0
  12. {api_dock-0.2.0 → api_dock-0.2.2}/api_dock/config.py +0 -0
  13. {api_dock-0.2.0 → api_dock-0.2.2}/api_dock/config_discovery.py +0 -0
  14. {api_dock-0.2.0 → api_dock-0.2.2}/api_dock/fast_api.py +0 -0
  15. {api_dock-0.2.0 → api_dock-0.2.2}/api_dock/flask_api.py +0 -0
  16. {api_dock-0.2.0 → api_dock-0.2.2}/api_dock/sql_builder.py +0 -0
  17. {api_dock-0.2.0 → api_dock-0.2.2}/api_dock.egg-info/SOURCES.txt +0 -0
  18. {api_dock-0.2.0 → api_dock-0.2.2}/api_dock.egg-info/dependency_links.txt +0 -0
  19. {api_dock-0.2.0 → api_dock-0.2.2}/api_dock.egg-info/entry_points.txt +0 -0
  20. {api_dock-0.2.0 → api_dock-0.2.2}/api_dock.egg-info/requires.txt +0 -0
  21. {api_dock-0.2.0 → api_dock-0.2.2}/api_dock.egg-info/top_level.txt +0 -0
  22. {api_dock-0.2.0 → api_dock-0.2.2}/config/config.yaml +0 -0
  23. {api_dock-0.2.0 → api_dock-0.2.2}/config/databases/db_example.yaml +0 -0
  24. {api_dock-0.2.0 → api_dock-0.2.2}/config/remotes/remote_with_allowed_routes.yaml +0 -0
  25. {api_dock-0.2.0 → api_dock-0.2.2}/config/remotes/remote_with_custom_mapping.yaml +0 -0
  26. {api_dock-0.2.0 → api_dock-0.2.2}/config/remotes/remote_with_restrictions.yaml +0 -0
  27. {api_dock-0.2.0 → api_dock-0.2.2}/config/remotes/remote_with_wildcards.yaml +0 -0
  28. {api_dock-0.2.0 → api_dock-0.2.2}/setup.cfg +0 -0
  29. {api_dock-0.2.0 → api_dock-0.2.2}/tests/test_config_syntax.py +0 -0
  30. {api_dock-0.2.0 → api_dock-0.2.2}/tests/test_curl_fixes.py +0 -0
  31. {api_dock-0.2.0 → api_dock-0.2.2}/tests/test_restrictions.py +0 -0
  32. {api_dock-0.2.0 → api_dock-0.2.2}/tests/test_root_endpoint.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: api_dock
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: A flexible API gateway that allows you to proxy requests to multiple remote APIs and Databases
5
5
  Author-email: Brookie Guzder-Williams <bguzder-williams@berkeley.edu>
6
6
  License: BSd 3-clause
@@ -84,15 +84,52 @@ def get_database_names(config: Dict[str, Any]) -> List[str]:
84
84
  def get_table_definition(table_name: str, database_config: Dict[str, Any]) -> Optional[str]:
85
85
  """Get the file path for a table from database configuration.
86
86
 
87
+ Supports both string URIs and dict-based definitions:
88
+ - String format: "table_name: s3://bucket/file.parquet"
89
+ - Dict format: "table_name: {uri: s3://bucket/file.parquet, region: us-east-2}"
90
+
87
91
  Args:
88
92
  table_name: Name of the table.
89
93
  database_config: Database configuration dictionary.
90
94
 
91
95
  Returns:
92
- File path for the table, or None if not found.
96
+ File path/URI for the table, or None if not found.
93
97
  """
94
98
  tables = database_config.get("tables", {})
95
- return tables.get(table_name)
99
+ table_def = tables.get(table_name)
100
+
101
+ # Handle both string and dict formats
102
+ if isinstance(table_def, str):
103
+ return table_def
104
+ elif isinstance(table_def, dict):
105
+ return table_def.get("uri") or table_def.get("path")
106
+ else:
107
+ return None
108
+
109
+
110
+ def get_table_metadata(table_name: str, database_config: Dict[str, Any]) -> Dict[str, Any]:
111
+ """Get metadata for a table from database configuration.
112
+
113
+ Returns metadata like region, auth headers, etc. if table is defined as a dict.
114
+
115
+ Args:
116
+ table_name: Name of the table.
117
+ database_config: Database configuration dictionary.
118
+
119
+ Returns:
120
+ Dictionary containing table metadata (empty dict if table is string format).
121
+ Possible keys: region, auth_headers, method, etc.
122
+ """
123
+ tables = database_config.get("tables", {})
124
+ table_def = tables.get(table_name)
125
+
126
+ if isinstance(table_def, dict):
127
+ # Return all metadata except the URI/path itself
128
+ metadata = {k: v for k, v in table_def.items() if k not in ['uri', 'path']}
129
+ return metadata
130
+ else:
131
+ # String format has no metadata
132
+ return {}
96
133
 
97
134
 
98
135
  def get_named_query(query_name: str, database_config: Dict[str, Any]) -> Optional[str]:
@@ -17,7 +17,7 @@ from typing import Any, Dict, List, Optional, Tuple
17
17
  from api_dock.config import find_remote_config, find_route_mapping, get_database_names, get_remote_names, get_remote_versions, get_settings, is_route_allowed, is_versioned_remote, load_main_config, resolve_latest_version
18
18
  from api_dock.database_config import find_database_route, get_database_versions, is_versioned_database, load_database_config, resolve_latest_database_version
19
19
  from api_dock.sql_builder import build_sql_query, extract_path_parameters
20
- from api_dock.storage_auth import detect_required_backends, extract_table_uris, setup_storage_authentication
20
+ from api_dock.storage_auth import detect_required_backends, extract_table_metadata_by_backend, extract_table_uris, setup_storage_authentication
21
21
 
22
22
 
23
23
  #
@@ -340,22 +340,32 @@ class RouteMapper:
340
340
  table_uris = extract_table_uris(database_config)
341
341
  required_backends = detect_required_backends(table_uris)
342
342
 
343
+ # Extract table metadata (region, auth headers, etc.) grouped by backend
344
+ backend_metadata = extract_table_metadata_by_backend(database_config)
345
+
343
346
  # Setup authentication for each backend
344
347
  # This automatically discovers credentials from:
345
- # - S3: AWS env vars, config files, IAM roles, SSO
348
+ # - S3: AWS env vars, config files, IAM roles, SSO (+ region from config or env)
346
349
  # - GCS: GCS env vars, service account files, HMAC keys
347
350
  # - Azure: Azure env vars, managed identity, CLI credentials
348
- # - HTTP/HTTPS: httpfs extension (custom headers can be added if needed)
351
+ # - HTTP/HTTPS: httpfs extension (+ custom headers/bearer token from config)
349
352
  #
353
+ # Metadata from config takes precedence over environment variables
350
354
  # Note: Authentication setup failures are graceful - public files will still work
351
- auth_results = setup_storage_authentication(conn, required_backends)
355
+ auth_results = setup_storage_authentication(conn, required_backends, backend_metadata)
352
356
 
353
357
  result = conn.execute(sql_query).fetchall()
354
358
  columns = [desc[0] for desc in conn.description] if conn.description else []
355
359
  conn.close()
356
360
 
357
- # Convert to list of dictionaries
358
- response_data = [dict(zip(columns, row)) for row in result]
361
+ # Convert to list of dictionaries with JSON-safe values
362
+ response_data = []
363
+ for row in result:
364
+ row_dict = {}
365
+ for col, val in zip(columns, row):
366
+ # Convert non-JSON-serializable types to strings
367
+ row_dict[col] = _make_json_safe(val)
368
+ response_data.append(row_dict)
359
369
 
360
370
  return (True, response_data, 200, None)
361
371
 
@@ -475,4 +485,34 @@ class RouteMapper:
475
485
 
476
486
  #
477
487
  # INTERNAL
478
- #
488
+ #
489
+ def _make_json_safe(value: Any) -> Any:
490
+ """Convert non-JSON-serializable values to JSON-safe types.
491
+
492
+ Handles datetime objects, dates, decimals, and other common types
493
+ that DuckDB returns but aren't directly JSON serializable.
494
+
495
+ Args:
496
+ value: Value to convert.
497
+
498
+ Returns:
499
+ JSON-safe version of the value.
500
+ """
501
+ from datetime import date, datetime
502
+ from decimal import Decimal
503
+
504
+ if value is None:
505
+ return None
506
+ elif isinstance(value, (datetime, date)):
507
+ # Convert datetime/date to ISO format string
508
+ return value.isoformat()
509
+ elif isinstance(value, Decimal):
510
+ # Convert Decimal to float
511
+ return float(value)
512
+ elif isinstance(value, bytes):
513
+ # Convert bytes to base64 string
514
+ import base64
515
+ return base64.b64encode(value).decode('utf-8')
516
+ else:
517
+ # Return as-is for JSON-safe types (str, int, float, bool, list, dict)
518
+ return value
@@ -0,0 +1,435 @@
1
+ """
2
+
3
+ Storage Authentication Module for API Dock
4
+
5
+ Handles authentication setup for various cloud storage backends (AWS S3, GCS, Azure, HTTP/HTTPS)
6
+ in DuckDB queries. Supports both public and private files with credential chain authentication.
7
+
8
+ License: BSD 3-Clause
9
+
10
+ """
11
+
12
+ #
13
+ # IMPORTS
14
+ #
15
+ import re
16
+ from typing import Any, Dict, List, Optional, Set
17
+
18
+
19
+ #
20
+ # CONSTANTS
21
+ #
22
+ # Storage backend detection patterns
23
+ S3_PATTERN = re.compile(r'^s3[a]?://', re.IGNORECASE)
24
+ GCS_PATTERN = re.compile(r'^gs://', re.IGNORECASE)
25
+ AZURE_PATTERN = re.compile(r'^az[ure]*://', re.IGNORECASE)
26
+ HTTP_PATTERN = re.compile(r'^https?://', re.IGNORECASE)
27
+
28
+ # Storage backend types
29
+ BACKEND_S3 = 's3'
30
+ BACKEND_GCS = 'gcs'
31
+ BACKEND_AZURE = 'azure'
32
+ BACKEND_HTTP = 'http'
33
+ BACKEND_LOCAL = 'local'
34
+
35
+
36
+ #
37
+ # PUBLIC
38
+ #
39
+ def detect_storage_backend(uri: str) -> str:
40
+ """Detect the storage backend from a URI.
41
+
42
+ Args:
43
+ uri: File URI or path (e.g., "s3://bucket/file", "gs://bucket/file", "/path/to/file")
44
+
45
+ Returns:
46
+ Storage backend type: 's3', 'gcs', 'azure', 'http', or 'local'
47
+ """
48
+ if S3_PATTERN.match(uri):
49
+ return BACKEND_S3
50
+ elif GCS_PATTERN.match(uri):
51
+ return BACKEND_GCS
52
+ elif AZURE_PATTERN.match(uri):
53
+ return BACKEND_AZURE
54
+ elif HTTP_PATTERN.match(uri):
55
+ return BACKEND_HTTP
56
+ else:
57
+ return BACKEND_LOCAL
58
+
59
+
60
+ def extract_table_uris(database_config: Dict[str, Any]) -> List[str]:
61
+ """Extract all table URIs from database configuration.
62
+
63
+ Supports both string and dict table definitions:
64
+ - String: "table: s3://bucket/file.parquet"
65
+ - Dict: "table: {uri: s3://bucket/file.parquet, region: us-east-2}"
66
+
67
+ Args:
68
+ database_config: Database configuration dictionary with 'tables' section.
69
+
70
+ Returns:
71
+ List of table URIs/paths.
72
+ """
73
+ tables = database_config.get('tables', {})
74
+ uris = []
75
+
76
+ for table_def in tables.values():
77
+ if isinstance(table_def, str):
78
+ uris.append(table_def)
79
+ elif isinstance(table_def, dict):
80
+ uri = table_def.get('uri') or table_def.get('path')
81
+ if uri:
82
+ uris.append(uri)
83
+
84
+ return uris
85
+
86
+
87
+ def extract_table_metadata_by_backend(database_config: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
88
+ """Extract table metadata grouped by storage backend.
89
+
90
+ Args:
91
+ database_config: Database configuration dictionary with 'tables' section.
92
+
93
+ Returns:
94
+ Dictionary mapping backend types to their aggregated metadata.
95
+ Example: {'s3': {'region': 'us-east-2'}, 'http': {'auth_headers': {...}}}
96
+ """
97
+ tables = database_config.get('tables', {})
98
+ backend_metadata = {}
99
+
100
+ for table_def in tables.values():
101
+ # Get URI and metadata
102
+ if isinstance(table_def, str):
103
+ uri = table_def
104
+ metadata = {}
105
+ elif isinstance(table_def, dict):
106
+ uri = table_def.get('uri') or table_def.get('path')
107
+ metadata = {k: v for k, v in table_def.items() if k not in ['uri', 'path']}
108
+ else:
109
+ continue
110
+
111
+ if not uri:
112
+ continue
113
+
114
+ # Detect backend and store metadata
115
+ backend = detect_storage_backend(uri)
116
+
117
+ if backend not in backend_metadata:
118
+ backend_metadata[backend] = {}
119
+
120
+ # Merge metadata (later tables can override earlier ones)
121
+ backend_metadata[backend].update(metadata)
122
+
123
+ return backend_metadata
124
+
125
+
126
+ def detect_required_backends(table_uris: List[str]) -> Set[str]:
127
+ """Detect which storage backends are needed for a list of table URIs.
128
+
129
+ Args:
130
+ table_uris: List of table URIs/paths.
131
+
132
+ Returns:
133
+ Set of required backend types (e.g., {'s3', 'gcs', 'local'})
134
+ """
135
+ backends = set()
136
+ for uri in table_uris:
137
+ backend = detect_storage_backend(uri)
138
+ backends.add(backend)
139
+ return backends
140
+
141
+
142
+ def setup_storage_authentication(conn: Any, backends: Set[str], metadata: Optional[Dict[str, Dict[str, Any]]] = None) -> Dict[str, bool]:
143
+ """Setup authentication for required storage backends in DuckDB connection.
144
+
145
+ This function attempts to configure authentication for each required backend.
146
+ It gracefully handles failures, allowing queries to proceed with public files
147
+ or when credentials are not needed.
148
+
149
+ Supported backends:
150
+ - S3: Uses AWS credential chain (env vars, config files, IAM roles)
151
+ - GCS: Uses GCS credential chain (service account, HMAC keys)
152
+ - Azure: Uses Azure credential chain (env vars, managed identity)
153
+ - HTTP/HTTPS: Uses httpfs extension (supports public files)
154
+
155
+ Args:
156
+ conn: DuckDB connection object.
157
+ backends: Set of required backend types.
158
+ metadata: Optional dictionary mapping backend types to their configuration metadata.
159
+ Example: {'s3': {'region': 'us-east-2'}, 'http': {'auth_headers': {...}}}
160
+
161
+ Returns:
162
+ Dictionary mapping backend names to setup success status.
163
+ True means authentication was configured, False means it failed but
164
+ the query may still work with public files.
165
+ """
166
+ if metadata is None:
167
+ metadata = {}
168
+
169
+ results = {}
170
+
171
+ # Setup S3 authentication (AWS)
172
+ if BACKEND_S3 in backends:
173
+ s3_metadata = metadata.get(BACKEND_S3, {})
174
+ results[BACKEND_S3] = _setup_s3_auth(conn, s3_metadata)
175
+
176
+ # Setup GCS authentication (Google Cloud Storage)
177
+ if BACKEND_GCS in backends:
178
+ gcs_metadata = metadata.get(BACKEND_GCS, {})
179
+ results[BACKEND_GCS] = _setup_gcs_auth(conn, gcs_metadata)
180
+
181
+ # Setup Azure authentication
182
+ if BACKEND_AZURE in backends:
183
+ azure_metadata = metadata.get(BACKEND_AZURE, {})
184
+ results[BACKEND_AZURE] = _setup_azure_auth(conn, azure_metadata)
185
+
186
+ # Setup HTTP/HTTPS support
187
+ if BACKEND_HTTP in backends:
188
+ http_metadata = metadata.get(BACKEND_HTTP, {})
189
+ results[BACKEND_HTTP] = _setup_http_support(conn, http_metadata)
190
+
191
+ # Local files don't need authentication
192
+ if BACKEND_LOCAL in backends:
193
+ results[BACKEND_LOCAL] = True
194
+
195
+ return results
196
+
197
+
198
+ #
199
+ # INTERNAL
200
+ #
201
+ def _setup_s3_auth(conn: Any, metadata: Optional[Dict[str, Any]] = None) -> bool:
202
+ """Setup AWS S3 authentication using credential chain.
203
+
204
+ Attempts to configure S3 access using AWS credential chain which automatically
205
+ discovers credentials from environment variables, config files, IAM roles, etc.
206
+
207
+ Region configuration priority:
208
+ 1. metadata['region'] (from database config)
209
+ 2. AWS_DEFAULT_REGION or AWS_REGION environment variable
210
+ 3. None (DuckDB auto-detect, may cause 301 redirects)
211
+
212
+ Args:
213
+ conn: DuckDB connection object.
214
+ metadata: Optional metadata dict that may contain 'region' key.
215
+
216
+ Returns:
217
+ True if setup succeeded, False if it failed (but query may still work with public files).
218
+ """
219
+ try:
220
+ import os
221
+
222
+ if metadata is None:
223
+ metadata = {}
224
+
225
+ conn.execute("INSTALL aws;")
226
+ conn.execute("LOAD aws;")
227
+
228
+ # Determine AWS region with priority:
229
+ # 1. Config file metadata (most specific)
230
+ # 2. Environment variables
231
+ # 3. None (auto-detect)
232
+ aws_region = (
233
+ metadata.get('region') or
234
+ os.environ.get('AWS_DEFAULT_REGION') or
235
+ os.environ.get('AWS_REGION')
236
+ )
237
+
238
+ # Configure S3 authentication using AWS credential chain
239
+ # This automatically discovers credentials from:
240
+ # - Environment variables (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN)
241
+ # - AWS config files (~/.aws/credentials, ~/.aws/config)
242
+ # - IAM roles (EC2, ECS, EKS, Lambda)
243
+ # - SSO credentials
244
+ # - Other AWS SDK credential providers
245
+ if aws_region:
246
+ # If region is specified, include it in the secret
247
+ conn.execute(f"""
248
+ CREATE OR REPLACE SECRET (
249
+ TYPE s3,
250
+ PROVIDER credential_chain,
251
+ REGION '{aws_region}'
252
+ );
253
+ """)
254
+ else:
255
+ # No region specified, let DuckDB auto-detect
256
+ # Note: This may cause 301 redirects if bucket is in a different region
257
+ conn.execute("""
258
+ CREATE OR REPLACE SECRET (
259
+ TYPE s3,
260
+ PROVIDER credential_chain
261
+ );
262
+ """)
263
+ return True
264
+ except Exception:
265
+ # Authentication setup failed, but public S3 files may still work
266
+ return False
267
+
268
+
269
+ def _setup_gcs_auth(conn: Any, metadata: Optional[Dict[str, Any]] = None) -> bool:
270
+ """Setup GCS authentication using credential chain.
271
+
272
+ Attempts to configure GCS access using credential chain which automatically
273
+ discovers credentials from environment variables, service account files, etc.
274
+
275
+ Supports metadata for advanced configuration:
276
+ - service_account: Path to service account JSON file (overrides GOOGLE_APPLICATION_CREDENTIALS)
277
+ - key_id: HMAC access key ID (overrides GCS_ACCESS_KEY_ID)
278
+ - secret: HMAC secret key (overrides GCS_SECRET_ACCESS_KEY)
279
+ - endpoint: Custom endpoint for GCS-compatible storage
280
+
281
+ Args:
282
+ conn: DuckDB connection object.
283
+ metadata: Optional metadata dict with GCS-specific configuration.
284
+
285
+ Returns:
286
+ True if setup succeeded, False if it failed (but query may still work with public files).
287
+ """
288
+ try:
289
+ import os
290
+
291
+ if metadata is None:
292
+ metadata = {}
293
+
294
+ # Install httpfs extension (required for GCS)
295
+ conn.execute("INSTALL httpfs;")
296
+ conn.execute("LOAD httpfs;")
297
+
298
+ # Check if explicit credentials are provided in metadata
299
+ key_id = metadata.get('key_id')
300
+ secret = metadata.get('secret')
301
+ service_account = metadata.get('service_account')
302
+ endpoint = metadata.get('endpoint')
303
+
304
+ # Priority for service account:
305
+ # 1. Metadata service_account path
306
+ # 2. GOOGLE_APPLICATION_CREDENTIALS env var
307
+ if service_account:
308
+ # Set environment variable for this session
309
+ os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = service_account
310
+
311
+ # Configure GCS authentication
312
+ if key_id and secret:
313
+ # Use explicit HMAC credentials from config
314
+ secret_parts = [
315
+ "TYPE gcs",
316
+ f"KEY_ID '{key_id}'",
317
+ f"SECRET '{secret}'"
318
+ ]
319
+
320
+ if endpoint:
321
+ secret_parts.append(f"ENDPOINT '{endpoint}'")
322
+
323
+ secret_sql = f"CREATE OR REPLACE SECRET ({', '.join(secret_parts)});"
324
+ conn.execute(secret_sql)
325
+ else:
326
+ # Use credential chain (environment variables, service account, etc.)
327
+ # This automatically discovers credentials from:
328
+ # - Environment variables (GCS_ACCESS_KEY_ID, GCS_SECRET_ACCESS_KEY)
329
+ # - Service account files (GOOGLE_APPLICATION_CREDENTIALS)
330
+ # - HMAC keys from GCS settings
331
+ conn.execute("""
332
+ CREATE OR REPLACE SECRET (
333
+ TYPE gcs,
334
+ PROVIDER credential_chain
335
+ );
336
+ """)
337
+ return True
338
+ except Exception:
339
+ # Authentication setup failed, but public GCS files may still work
340
+ return False
341
+
342
+
343
+ def _setup_azure_auth(conn: Any, metadata: Optional[Dict[str, Any]] = None) -> bool:
344
+ """Setup Azure Blob Storage authentication using credential chain.
345
+
346
+ Attempts to configure Azure access using credential chain which automatically
347
+ discovers credentials from environment variables, managed identity, etc.
348
+
349
+ Args:
350
+ conn: DuckDB connection object.
351
+ metadata: Optional metadata dict (currently unused for Azure, reserved for future).
352
+
353
+ Returns:
354
+ True if setup succeeded, False if it failed (but query may still work with public files).
355
+ """
356
+ try:
357
+ if metadata is None:
358
+ metadata = {}
359
+
360
+ conn.execute("INSTALL azure;")
361
+ conn.execute("LOAD azure;")
362
+
363
+ # Configure Azure authentication using credential chain
364
+ # This automatically discovers credentials from:
365
+ # - Environment variables (AZURE_STORAGE_CONNECTION_STRING, AZURE_STORAGE_ACCOUNT, etc.)
366
+ # - Managed Identity (when running on Azure)
367
+ # - Azure CLI credentials
368
+ conn.execute("""
369
+ CREATE OR REPLACE SECRET (
370
+ TYPE azure,
371
+ PROVIDER credential_chain
372
+ );
373
+ """)
374
+ return True
375
+ except Exception:
376
+ # Authentication setup failed, but public Azure files may still work
377
+ return False
378
+
379
+
380
+ def _setup_http_support(conn: Any, metadata: Optional[Dict[str, Any]] = None) -> bool:
381
+ """Setup HTTP/HTTPS support.
382
+
383
+ Installs httpfs extension for HTTP/HTTPS access. If metadata contains
384
+ auth_headers, bearer_token, or cookies, configures HTTP authentication.
385
+
386
+ Args:
387
+ conn: DuckDB connection object.
388
+ metadata: Optional metadata dict that may contain:
389
+ - bearer_token: Bearer token for Authorization header
390
+ - auth_headers: Dict of custom HTTP headers
391
+ - cookies: Dict of cookies to send with requests
392
+
393
+ Returns:
394
+ True if setup succeeded, False if it failed.
395
+ """
396
+ try:
397
+ if metadata is None:
398
+ metadata = {}
399
+
400
+ # Install httpfs extension (supports HTTP/HTTPS)
401
+ conn.execute("INSTALL httpfs;")
402
+ conn.execute("LOAD httpfs;")
403
+
404
+ # Setup HTTP authentication if provided
405
+ bearer_token = metadata.get('bearer_token')
406
+ auth_headers = metadata.get('auth_headers', {})
407
+ cookies = metadata.get('cookies', {})
408
+
409
+ # Build headers dict
410
+ headers = dict(auth_headers) if auth_headers else {}
411
+
412
+ # Add bearer token to headers if provided
413
+ if bearer_token:
414
+ headers['Authorization'] = f'Bearer {bearer_token}'
415
+
416
+ # Add cookies to headers if provided
417
+ # Cookies are sent via the Cookie header
418
+ if cookies:
419
+ cookie_str = '; '.join([f'{k}={v}' for k, v in cookies.items()])
420
+ headers['Cookie'] = cookie_str
421
+
422
+ # Create HTTP secret with headers if any are configured
423
+ if headers:
424
+ # Convert dict to DuckDB MAP format
425
+ headers_str = ', '.join([f"'{k}': '{v}'" for k, v in headers.items()])
426
+ conn.execute(f"""
427
+ CREATE OR REPLACE SECRET http_auth (
428
+ TYPE http,
429
+ EXTRA_HTTP_HEADERS MAP {{{headers_str}}}
430
+ );
431
+ """)
432
+
433
+ return True
434
+ except Exception:
435
+ return False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: api_dock
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: A flexible API gateway that allows you to proxy requests to multiple remote APIs and Databases
5
5
  Author-email: Brookie Guzder-Williams <bguzder-williams@berkeley.edu>
6
6
  License: BSd 3-clause
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "api_dock"
7
- version = "0.2.0"
7
+ version = "0.2.2"
8
8
  description = "A flexible API gateway that allows you to proxy requests to multiple remote APIs and Databases"
9
9
  readme = "README.md"
10
10
  license = {text = "BSd 3-clause"}
@@ -1,273 +0,0 @@
1
- """
2
-
3
- Storage Authentication Module for API Dock
4
-
5
- Handles authentication setup for various cloud storage backends (AWS S3, GCS, Azure, HTTP/HTTPS)
6
- in DuckDB queries. Supports both public and private files with credential chain authentication.
7
-
8
- License: BSD 3-Clause
9
-
10
- """
11
-
12
- #
13
- # IMPORTS
14
- #
15
- import re
16
- from typing import Any, Dict, List, Optional, Set
17
-
18
-
19
- #
20
- # CONSTANTS
21
- #
22
- # Storage backend detection patterns
23
- S3_PATTERN = re.compile(r'^s3[a]?://', re.IGNORECASE)
24
- GCS_PATTERN = re.compile(r'^gs://', re.IGNORECASE)
25
- AZURE_PATTERN = re.compile(r'^az[ure]*://', re.IGNORECASE)
26
- HTTP_PATTERN = re.compile(r'^https?://', re.IGNORECASE)
27
-
28
- # Storage backend types
29
- BACKEND_S3 = 's3'
30
- BACKEND_GCS = 'gcs'
31
- BACKEND_AZURE = 'azure'
32
- BACKEND_HTTP = 'http'
33
- BACKEND_LOCAL = 'local'
34
-
35
-
36
- #
37
- # PUBLIC
38
- #
39
- def detect_storage_backend(uri: str) -> str:
40
- """Detect the storage backend from a URI.
41
-
42
- Args:
43
- uri: File URI or path (e.g., "s3://bucket/file", "gs://bucket/file", "/path/to/file")
44
-
45
- Returns:
46
- Storage backend type: 's3', 'gcs', 'azure', 'http', or 'local'
47
- """
48
- if S3_PATTERN.match(uri):
49
- return BACKEND_S3
50
- elif GCS_PATTERN.match(uri):
51
- return BACKEND_GCS
52
- elif AZURE_PATTERN.match(uri):
53
- return BACKEND_AZURE
54
- elif HTTP_PATTERN.match(uri):
55
- return BACKEND_HTTP
56
- else:
57
- return BACKEND_LOCAL
58
-
59
-
60
- def extract_table_uris(database_config: Dict[str, Any]) -> List[str]:
61
- """Extract all table URIs from database configuration.
62
-
63
- Args:
64
- database_config: Database configuration dictionary with 'tables' section.
65
-
66
- Returns:
67
- List of table URIs/paths.
68
- """
69
- tables = database_config.get('tables', {})
70
- return list(tables.values())
71
-
72
-
73
- def detect_required_backends(table_uris: List[str]) -> Set[str]:
74
- """Detect which storage backends are needed for a list of table URIs.
75
-
76
- Args:
77
- table_uris: List of table URIs/paths.
78
-
79
- Returns:
80
- Set of required backend types (e.g., {'s3', 'gcs', 'local'})
81
- """
82
- backends = set()
83
- for uri in table_uris:
84
- backend = detect_storage_backend(uri)
85
- backends.add(backend)
86
- return backends
87
-
88
-
89
- def setup_storage_authentication(conn: Any, backends: Set[str]) -> Dict[str, bool]:
90
- """Setup authentication for required storage backends in DuckDB connection.
91
-
92
- This function attempts to configure authentication for each required backend.
93
- It gracefully handles failures, allowing queries to proceed with public files
94
- or when credentials are not needed.
95
-
96
- Supported backends:
97
- - S3: Uses AWS credential chain (env vars, config files, IAM roles)
98
- - GCS: Uses GCS credential chain (service account, HMAC keys)
99
- - Azure: Uses Azure credential chain (env vars, managed identity)
100
- - HTTP/HTTPS: Uses httpfs extension (supports public files)
101
-
102
- Args:
103
- conn: DuckDB connection object.
104
- backends: Set of required backend types.
105
-
106
- Returns:
107
- Dictionary mapping backend names to setup success status.
108
- True means authentication was configured, False means it failed but
109
- the query may still work with public files.
110
- """
111
- results = {}
112
-
113
- # Setup S3 authentication (AWS)
114
- if BACKEND_S3 in backends:
115
- results[BACKEND_S3] = _setup_s3_auth(conn)
116
-
117
- # Setup GCS authentication (Google Cloud Storage)
118
- if BACKEND_GCS in backends:
119
- results[BACKEND_GCS] = _setup_gcs_auth(conn)
120
-
121
- # Setup Azure authentication
122
- if BACKEND_AZURE in backends:
123
- results[BACKEND_AZURE] = _setup_azure_auth(conn)
124
-
125
- # Setup HTTP/HTTPS support
126
- if BACKEND_HTTP in backends:
127
- results[BACKEND_HTTP] = _setup_http_support(conn)
128
-
129
- # Local files don't need authentication
130
- if BACKEND_LOCAL in backends:
131
- results[BACKEND_LOCAL] = True
132
-
133
- return results
134
-
135
-
136
- #
137
- # INTERNAL
138
- #
139
- def _setup_s3_auth(conn: Any) -> bool:
140
- """Setup AWS S3 authentication using credential chain.
141
-
142
- Attempts to configure S3 access using AWS credential chain which automatically
143
- discovers credentials from environment variables, config files, IAM roles, etc.
144
-
145
- Args:
146
- conn: DuckDB connection object.
147
-
148
- Returns:
149
- True if setup succeeded, False if it failed (but query may still work with public files).
150
- """
151
- try:
152
- conn.execute("INSTALL aws;")
153
- conn.execute("LOAD aws;")
154
-
155
- # Configure S3 authentication using AWS credential chain
156
- # This automatically discovers credentials from:
157
- # - Environment variables (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN)
158
- # - AWS config files (~/.aws/credentials, ~/.aws/config)
159
- # - IAM roles (EC2, ECS, EKS, Lambda)
160
- # - SSO credentials
161
- # - Other AWS SDK credential providers
162
- conn.execute("""
163
- CREATE OR REPLACE SECRET (
164
- TYPE s3,
165
- PROVIDER credential_chain
166
- );
167
- """)
168
- return True
169
- except Exception:
170
- # Authentication setup failed, but public S3 files may still work
171
- return False
172
-
173
-
174
- def _setup_gcs_auth(conn: Any) -> bool:
175
- """Setup GCS authentication using credential chain.
176
-
177
- Attempts to configure GCS access using credential chain which automatically
178
- discovers credentials from environment variables, service account files, etc.
179
-
180
- Args:
181
- conn: DuckDB connection object.
182
-
183
- Returns:
184
- True if setup succeeded, False if it failed (but query may still work with public files).
185
- """
186
- try:
187
- # Install httpfs extension (required for GCS)
188
- conn.execute("INSTALL httpfs;")
189
- conn.execute("LOAD httpfs;")
190
-
191
- # Configure GCS authentication using credential chain
192
- # This automatically discovers credentials from:
193
- # - Environment variables (GCS_ACCESS_KEY_ID, GCS_SECRET_ACCESS_KEY)
194
- # - Service account files (GOOGLE_APPLICATION_CREDENTIALS)
195
- # - HMAC keys from GCS settings
196
- conn.execute("""
197
- CREATE OR REPLACE SECRET (
198
- TYPE gcs,
199
- PROVIDER credential_chain
200
- );
201
- """)
202
- return True
203
- except Exception:
204
- # Authentication setup failed, but public GCS files may still work
205
- return False
206
-
207
-
208
- def _setup_azure_auth(conn: Any) -> bool:
209
- """Setup Azure Blob Storage authentication using credential chain.
210
-
211
- Attempts to configure Azure access using credential chain which automatically
212
- discovers credentials from environment variables, managed identity, etc.
213
-
214
- Args:
215
- conn: DuckDB connection object.
216
-
217
- Returns:
218
- True if setup succeeded, False if it failed (but query may still work with public files).
219
- """
220
- try:
221
- conn.execute("INSTALL azure;")
222
- conn.execute("LOAD azure;")
223
-
224
- # Configure Azure authentication using credential chain
225
- # This automatically discovers credentials from:
226
- # - Environment variables (AZURE_STORAGE_CONNECTION_STRING, AZURE_STORAGE_ACCOUNT, etc.)
227
- # - Managed Identity (when running on Azure)
228
- # - Azure CLI credentials
229
- conn.execute("""
230
- CREATE OR REPLACE SECRET (
231
- TYPE azure,
232
- PROVIDER credential_chain
233
- );
234
- """)
235
- return True
236
- except Exception:
237
- # Authentication setup failed, but public Azure files may still work
238
- return False
239
-
240
-
241
- def _setup_http_support(conn: Any) -> bool:
242
- """Setup HTTP/HTTPS support.
243
-
244
- Installs httpfs extension for HTTP/HTTPS access. Note that HTTP authentication
245
- (bearer tokens, custom headers) can be configured separately if needed.
246
-
247
- Args:
248
- conn: DuckDB connection object.
249
-
250
- Returns:
251
- True if setup succeeded, False if it failed.
252
- """
253
- try:
254
- # Install httpfs extension (supports HTTP/HTTPS)
255
- conn.execute("INSTALL httpfs;")
256
- conn.execute("LOAD httpfs;")
257
-
258
- # Note: HTTP authentication (if needed) can be configured with:
259
- # CREATE SECRET http_auth (
260
- # TYPE http,
261
- # BEARER_TOKEN 'token'
262
- # );
263
- # or
264
- # CREATE SECRET http_auth (
265
- # TYPE http,
266
- # EXTRA_HTTP_HEADERS MAP {
267
- # 'Authorization': 'Bearer token'
268
- # }
269
- # );
270
-
271
- return True
272
- except Exception:
273
- return False
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes