adss 1.0__py3-none-any.whl → 1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adss/__init__.py +24 -0
- adss/auth.py +121 -0
- adss/client.py +671 -0
- adss/endpoints/__init__.py +14 -0
- adss/endpoints/admin.py +433 -0
- adss/endpoints/images.py +898 -0
- adss/endpoints/metadata.py +216 -0
- adss/endpoints/queries.py +498 -0
- adss/endpoints/users.py +311 -0
- adss/exceptions.py +57 -0
- adss/models/__init__.py +13 -0
- adss/models/metadata.py +138 -0
- adss/models/query.py +134 -0
- adss/models/user.py +123 -0
- adss/utils.py +107 -0
- {adss-1.0.dist-info → adss-1.2.dist-info}/METADATA +1 -1
- adss-1.2.dist-info/RECORD +30 -0
- {adss-1.0.dist-info → adss-1.2.dist-info}/WHEEL +1 -1
- adss-1.0.dist-info/RECORD +0 -16
- {adss-1.0.dist-info → adss-1.2.dist-info}/LICENSE +0 -0
- {adss-1.0.dist-info → adss-1.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,498 @@
|
|
1
|
+
"""
|
2
|
+
Query execution and management functionality for the Astronomy TAP Client.
|
3
|
+
"""
|
4
|
+
import time
|
5
|
+
import requests
|
6
|
+
from typing import Dict, List, Optional, Union, Any, BinaryIO, Tuple
|
7
|
+
import io
|
8
|
+
import pandas as pd
|
9
|
+
|
10
|
+
from adss.exceptions import QueryExecutionError, ResourceNotFoundError
|
11
|
+
from adss.utils import handle_response_errors, parquet_to_dataframe
|
12
|
+
from adss.models.query import Query, QueryResult
|
13
|
+
|
14
|
+
|
15
|
+
class QueriesEndpoint:
|
16
|
+
"""
|
17
|
+
Handles query execution and management.
|
18
|
+
"""
|
19
|
+
|
20
|
+
def __init__(self, base_url: str, auth_manager):
|
21
|
+
"""
|
22
|
+
Initialize the Queries endpoint.
|
23
|
+
|
24
|
+
Args:
|
25
|
+
base_url: The base URL of the API server
|
26
|
+
auth_manager: Authentication manager providing auth headers
|
27
|
+
"""
|
28
|
+
self.base_url = base_url.rstrip('/')
|
29
|
+
self.auth_manager = auth_manager
|
30
|
+
|
31
|
+
def execute_sync(self,
|
32
|
+
query: str,
|
33
|
+
mode: str = 'adql',
|
34
|
+
file: Optional[Union[str, BinaryIO]] = None,
|
35
|
+
table_name: Optional[str] = None,
|
36
|
+
**kwargs) -> QueryResult:
|
37
|
+
"""
|
38
|
+
Execute a query synchronously and return the results.
|
39
|
+
|
40
|
+
Args:
|
41
|
+
query: The query to execute (ADQL or SQL)
|
42
|
+
mode: Query mode ('adql' or 'sql')
|
43
|
+
file: Optional file path or file-like object to upload as a temporary table
|
44
|
+
table_name: Name for the uploaded table (required if file is provided)
|
45
|
+
**kwargs: Additional keyword arguments to pass to the request (e.g., verify=False)
|
46
|
+
|
47
|
+
Returns:
|
48
|
+
QueryResult object containing the query data and metadata
|
49
|
+
|
50
|
+
Raises:
|
51
|
+
QueryExecutionError: If the query execution fails
|
52
|
+
"""
|
53
|
+
# Don't include content-type in headers as requests will set it for multipart/form-data
|
54
|
+
data = {
|
55
|
+
"query": query,
|
56
|
+
"mode": mode
|
57
|
+
}
|
58
|
+
|
59
|
+
files = {}
|
60
|
+
|
61
|
+
# Handle file upload if provided
|
62
|
+
if file:
|
63
|
+
if not table_name:
|
64
|
+
raise ValueError("table_name is required when uploading a file")
|
65
|
+
|
66
|
+
# If file is a string, open the file
|
67
|
+
if isinstance(file, str):
|
68
|
+
file_obj = open(file, 'rb')
|
69
|
+
close_file = True
|
70
|
+
else:
|
71
|
+
file_obj = file
|
72
|
+
close_file = False
|
73
|
+
|
74
|
+
try:
|
75
|
+
files = {
|
76
|
+
"file": file_obj
|
77
|
+
}
|
78
|
+
data["table_name"] = table_name
|
79
|
+
|
80
|
+
response = self.auth_manager.request(
|
81
|
+
method="POST",
|
82
|
+
url="/adss/sync",
|
83
|
+
data=data,
|
84
|
+
files=files,
|
85
|
+
**kwargs
|
86
|
+
)
|
87
|
+
finally:
|
88
|
+
if close_file:
|
89
|
+
file_obj.close()
|
90
|
+
else:
|
91
|
+
# No file upload
|
92
|
+
response = self.auth_manager.request(
|
93
|
+
method="POST",
|
94
|
+
url="/adss/sync",
|
95
|
+
data=data,
|
96
|
+
**kwargs
|
97
|
+
)
|
98
|
+
|
99
|
+
try:
|
100
|
+
handle_response_errors(response)
|
101
|
+
|
102
|
+
# Extract metadata from headers
|
103
|
+
execution_time = int(response.headers.get('X-Execution-Time-Ms', 0))
|
104
|
+
row_count = int(response.headers.get('X-Row-Count', 0))
|
105
|
+
|
106
|
+
# Create a minimal Query object for the QueryResult
|
107
|
+
query_obj = Query(
|
108
|
+
id="sync_query", # Synchronous queries don't have an ID
|
109
|
+
query_text=query,
|
110
|
+
status="COMPLETED",
|
111
|
+
created_at=pd.Timestamp.now(),
|
112
|
+
mode=mode,
|
113
|
+
completed_at=pd.Timestamp.now(),
|
114
|
+
execution_time_ms=execution_time,
|
115
|
+
row_count=row_count
|
116
|
+
)
|
117
|
+
|
118
|
+
# Parse Parquet data
|
119
|
+
df = parquet_to_dataframe(response.content)
|
120
|
+
|
121
|
+
return QueryResult(
|
122
|
+
query=query_obj,
|
123
|
+
data=df,
|
124
|
+
execution_time_ms=execution_time,
|
125
|
+
row_count=row_count,
|
126
|
+
column_count=len(df.columns) if not df.empty else 0
|
127
|
+
)
|
128
|
+
|
129
|
+
except Exception as e:
|
130
|
+
raise QueryExecutionError(f"Synchronous query execution failed: {str(e)}", query)
|
131
|
+
|
132
|
+
def execute_async(self,
|
133
|
+
query: str,
|
134
|
+
mode: str = 'adql',
|
135
|
+
file: Optional[Union[str, BinaryIO]] = None,
|
136
|
+
table_name: Optional[str] = None,
|
137
|
+
**kwargs) -> Query:
|
138
|
+
"""
|
139
|
+
Start an asynchronous query execution.
|
140
|
+
|
141
|
+
Args:
|
142
|
+
query: The query to execute (ADQL or SQL)
|
143
|
+
mode: Query mode ('adql' or 'sql')
|
144
|
+
file: Optional file path or file-like object to upload as a temporary table
|
145
|
+
table_name: Name for the uploaded table (required if file is provided)
|
146
|
+
**kwargs: Additional keyword arguments to pass to the request (e.g., verify=False)
|
147
|
+
|
148
|
+
Returns:
|
149
|
+
Query object with status information
|
150
|
+
|
151
|
+
Raises:
|
152
|
+
QueryExecutionError: If starting the query fails
|
153
|
+
"""
|
154
|
+
data = {
|
155
|
+
"query": query,
|
156
|
+
"mode": mode
|
157
|
+
}
|
158
|
+
|
159
|
+
files = {}
|
160
|
+
|
161
|
+
# Handle file upload if provided
|
162
|
+
if file:
|
163
|
+
if not table_name:
|
164
|
+
raise ValueError("table_name is required when uploading a file")
|
165
|
+
|
166
|
+
# If file is a string, open the file
|
167
|
+
if isinstance(file, str):
|
168
|
+
file_obj = open(file, 'rb')
|
169
|
+
close_file = True
|
170
|
+
else:
|
171
|
+
file_obj = file
|
172
|
+
close_file = False
|
173
|
+
|
174
|
+
try:
|
175
|
+
files = {
|
176
|
+
"file": file_obj
|
177
|
+
}
|
178
|
+
data["table_name"] = table_name
|
179
|
+
|
180
|
+
response = self.auth_manager.request(
|
181
|
+
method="POST",
|
182
|
+
url="/adss/async",
|
183
|
+
data=data,
|
184
|
+
files=files,
|
185
|
+
auth_required=True,
|
186
|
+
**kwargs
|
187
|
+
)
|
188
|
+
finally:
|
189
|
+
if close_file:
|
190
|
+
file_obj.close()
|
191
|
+
else:
|
192
|
+
# No file upload
|
193
|
+
response = self.auth_manager.request(
|
194
|
+
method="POST",
|
195
|
+
url="/adss/async",
|
196
|
+
data=data,
|
197
|
+
auth_required=True,
|
198
|
+
**kwargs
|
199
|
+
)
|
200
|
+
|
201
|
+
try:
|
202
|
+
handle_response_errors(response)
|
203
|
+
job_data = response.json()
|
204
|
+
return Query.from_dict(job_data)
|
205
|
+
|
206
|
+
except Exception as e:
|
207
|
+
raise QueryExecutionError(f"Failed to start asynchronous query: {str(e)}", query)
|
208
|
+
|
209
|
+
def get_status(self, query_id: str, **kwargs) -> Query:
|
210
|
+
"""
|
211
|
+
Get the status of an asynchronous query.
|
212
|
+
|
213
|
+
Args:
|
214
|
+
query_id: ID of the query to check
|
215
|
+
**kwargs: Additional keyword arguments to pass to the request (e.g., verify=False)
|
216
|
+
|
217
|
+
Returns:
|
218
|
+
Updated Query object with current status
|
219
|
+
|
220
|
+
Raises:
|
221
|
+
ResourceNotFoundError: If the query is not found
|
222
|
+
"""
|
223
|
+
try:
|
224
|
+
response = self.auth_manager.request(
|
225
|
+
method="GET",
|
226
|
+
url=f"/adss/async/{query_id}",
|
227
|
+
auth_required=True,
|
228
|
+
**kwargs
|
229
|
+
)
|
230
|
+
handle_response_errors(response)
|
231
|
+
|
232
|
+
job_data = response.json()
|
233
|
+
return Query.from_dict(job_data)
|
234
|
+
|
235
|
+
except ResourceNotFoundError:
|
236
|
+
raise
|
237
|
+
except Exception as e:
|
238
|
+
raise QueryExecutionError(f"Failed to get query status: {str(e)}")
|
239
|
+
|
240
|
+
def get_results(self, query_id: str, **kwargs) -> QueryResult:
|
241
|
+
"""
|
242
|
+
Get the results of a completed asynchronous query.
|
243
|
+
|
244
|
+
Args:
|
245
|
+
query_id: ID of the completed query
|
246
|
+
**kwargs: Additional keyword arguments to pass to the request (e.g., verify=False)
|
247
|
+
|
248
|
+
Returns:
|
249
|
+
QueryResult object with the query data
|
250
|
+
|
251
|
+
Raises:
|
252
|
+
ResourceNotFoundError: If the query is not found
|
253
|
+
QueryExecutionError: If the query is not completed or results can't be retrieved
|
254
|
+
"""
|
255
|
+
# First get the query status
|
256
|
+
query = self.get_status(query_id, **kwargs)
|
257
|
+
|
258
|
+
if not query.is_complete:
|
259
|
+
raise QueryExecutionError(
|
260
|
+
f"Cannot get results: Query is not completed (status: {query.status})",
|
261
|
+
query_id
|
262
|
+
)
|
263
|
+
|
264
|
+
if query.is_failed:
|
265
|
+
raise QueryExecutionError(
|
266
|
+
f"Cannot get results: Query failed with error: {query.error}",
|
267
|
+
query_id
|
268
|
+
)
|
269
|
+
|
270
|
+
# Get the results
|
271
|
+
try:
|
272
|
+
response = self.auth_manager.request(
|
273
|
+
method="GET",
|
274
|
+
url=f"/adss/async/{query_id}/results",
|
275
|
+
auth_required=True,
|
276
|
+
**kwargs
|
277
|
+
)
|
278
|
+
handle_response_errors(response)
|
279
|
+
|
280
|
+
# Parse Parquet data
|
281
|
+
df = parquet_to_dataframe(response.content)
|
282
|
+
|
283
|
+
# Extract metadata
|
284
|
+
expires_at = response.headers.get('X-Expires-At')
|
285
|
+
if expires_at:
|
286
|
+
query.expires_at = pd.Timestamp(expires_at)
|
287
|
+
|
288
|
+
return QueryResult(
|
289
|
+
query=query,
|
290
|
+
data=df,
|
291
|
+
execution_time_ms=query.execution_time_ms,
|
292
|
+
row_count=query.row_count or len(df),
|
293
|
+
column_count=len(df.columns) if not df.empty else 0
|
294
|
+
)
|
295
|
+
|
296
|
+
except Exception as e:
|
297
|
+
raise QueryExecutionError(f"Failed to get query results: {str(e)}", query_id)
|
298
|
+
|
299
|
+
def cancel_query(self, query_id: str, **kwargs) -> bool:
|
300
|
+
"""
|
301
|
+
Cancel an asynchronous query.
|
302
|
+
|
303
|
+
Args:
|
304
|
+
query_id: ID of the query to cancel
|
305
|
+
**kwargs: Additional keyword arguments to pass to the request (e.g., verify=False)
|
306
|
+
|
307
|
+
Returns:
|
308
|
+
True if the query was successfully canceled
|
309
|
+
|
310
|
+
Raises:
|
311
|
+
ResourceNotFoundError: If the query is not found
|
312
|
+
QueryExecutionError: If canceling the query fails
|
313
|
+
"""
|
314
|
+
try:
|
315
|
+
response = self.auth_manager.request(
|
316
|
+
method="DELETE",
|
317
|
+
url=f"/adss/async/{query_id}",
|
318
|
+
auth_required=True,
|
319
|
+
**kwargs
|
320
|
+
)
|
321
|
+
handle_response_errors(response)
|
322
|
+
|
323
|
+
return True
|
324
|
+
|
325
|
+
except ResourceNotFoundError:
|
326
|
+
raise
|
327
|
+
except Exception as e:
|
328
|
+
raise QueryExecutionError(f"Failed to cancel query: {str(e)}", query_id)
|
329
|
+
|
330
|
+
def wait_for_completion(self,
|
331
|
+
query_id: str,
|
332
|
+
timeout: Optional[int] = None,
|
333
|
+
poll_interval: int = 2,
|
334
|
+
**kwargs) -> Query:
|
335
|
+
"""
|
336
|
+
Wait for an asynchronous query to complete.
|
337
|
+
|
338
|
+
Args:
|
339
|
+
query_id: ID of the query to wait for
|
340
|
+
timeout: Maximum time to wait in seconds (None for no timeout)
|
341
|
+
poll_interval: Time between status checks in seconds
|
342
|
+
**kwargs: Additional keyword arguments to pass to the request (e.g., verify=False)
|
343
|
+
|
344
|
+
Returns:
|
345
|
+
Completed Query object
|
346
|
+
|
347
|
+
Raises:
|
348
|
+
ResourceNotFoundError: If the query is not found
|
349
|
+
TimeoutError: If the query doesn't complete within the timeout
|
350
|
+
QueryExecutionError: If the query fails
|
351
|
+
"""
|
352
|
+
start_time = time.time()
|
353
|
+
|
354
|
+
while True:
|
355
|
+
query = self.get_status(query_id, **kwargs)
|
356
|
+
|
357
|
+
if query.is_complete:
|
358
|
+
return query
|
359
|
+
|
360
|
+
if timeout and (time.time() - start_time > timeout):
|
361
|
+
raise TimeoutError(f"Query did not complete within {timeout} seconds")
|
362
|
+
|
363
|
+
time.sleep(poll_interval)
|
364
|
+
|
365
|
+
def execute_and_wait(self,
|
366
|
+
query: str,
|
367
|
+
mode: str = 'adql',
|
368
|
+
file: Optional[Union[str, BinaryIO]] = None,
|
369
|
+
table_name: Optional[str] = None,
|
370
|
+
timeout: Optional[int] = None,
|
371
|
+
poll_interval: int = 2,
|
372
|
+
**kwargs) -> QueryResult:
|
373
|
+
"""
|
374
|
+
Execute a query asynchronously and wait for the results.
|
375
|
+
|
376
|
+
Args:
|
377
|
+
query: The query to execute (ADQL or SQL)
|
378
|
+
mode: Query mode ('adql' or 'sql')
|
379
|
+
file: Optional file path or file-like object to upload as a temporary table
|
380
|
+
table_name: Name for the uploaded table (required if file is provided)
|
381
|
+
timeout: Maximum time to wait in seconds (None for no timeout)
|
382
|
+
poll_interval: Time between status checks in seconds
|
383
|
+
**kwargs: Additional keyword arguments to pass to the request (e.g., verify=False)
|
384
|
+
|
385
|
+
Returns:
|
386
|
+
QueryResult object containing the query data and metadata
|
387
|
+
|
388
|
+
Raises:
|
389
|
+
QueryExecutionError: If the query execution fails
|
390
|
+
TimeoutError: If the query doesn't complete within the timeout
|
391
|
+
"""
|
392
|
+
# Start async query
|
393
|
+
query_obj = self.execute_async(query, mode, file, table_name, **kwargs)
|
394
|
+
|
395
|
+
# Wait for completion
|
396
|
+
completed_query = self.wait_for_completion(query_obj.id, timeout, poll_interval, **kwargs)
|
397
|
+
|
398
|
+
if completed_query.is_failed:
|
399
|
+
raise QueryExecutionError(
|
400
|
+
f"Query failed with error: {completed_query.error}",
|
401
|
+
query
|
402
|
+
)
|
403
|
+
|
404
|
+
# Get results
|
405
|
+
return self.get_results(completed_query.id, **kwargs)
|
406
|
+
|
407
|
+
def get_history(self, limit: int = 50, **kwargs) -> List[Query]:
|
408
|
+
"""
|
409
|
+
Get the current user's query history.
|
410
|
+
|
411
|
+
Args:
|
412
|
+
limit: Maximum number of queries to return
|
413
|
+
**kwargs: Additional keyword arguments to pass to the request (e.g., verify=False)
|
414
|
+
|
415
|
+
Returns:
|
416
|
+
List of Query objects representing past queries
|
417
|
+
|
418
|
+
Raises:
|
419
|
+
AuthenticationError: If not authenticated
|
420
|
+
"""
|
421
|
+
params = {"limit": limit}
|
422
|
+
|
423
|
+
try:
|
424
|
+
response = self.auth_manager.request(
|
425
|
+
method="GET",
|
426
|
+
url="/adss/v1/queries/me",
|
427
|
+
params=params,
|
428
|
+
auth_required=True,
|
429
|
+
**kwargs
|
430
|
+
)
|
431
|
+
handle_response_errors(response)
|
432
|
+
|
433
|
+
queries_data = response.json()
|
434
|
+
return [Query.from_dict(q) for q in queries_data]
|
435
|
+
|
436
|
+
except Exception as e:
|
437
|
+
raise QueryExecutionError(f"Failed to get query history: {str(e)}")
|
438
|
+
|
439
|
+
def get_query_details(self, query_id: str, **kwargs) -> Query:
|
440
|
+
"""
|
441
|
+
Get detailed information about a specific query.
|
442
|
+
|
443
|
+
Args:
|
444
|
+
query_id: ID of the query
|
445
|
+
**kwargs: Additional keyword arguments to pass to the request (e.g., verify=False)
|
446
|
+
|
447
|
+
Returns:
|
448
|
+
Query object with detailed information
|
449
|
+
|
450
|
+
Raises:
|
451
|
+
ResourceNotFoundError: If the query is not found
|
452
|
+
"""
|
453
|
+
try:
|
454
|
+
response = self.auth_manager.request(
|
455
|
+
method="GET",
|
456
|
+
url=f"/adss/v1/queries/{query_id}",
|
457
|
+
auth_required=True,
|
458
|
+
**kwargs
|
459
|
+
)
|
460
|
+
handle_response_errors(response)
|
461
|
+
|
462
|
+
query_data = response.json()
|
463
|
+
return Query.from_dict(query_data)
|
464
|
+
|
465
|
+
except ResourceNotFoundError:
|
466
|
+
raise
|
467
|
+
except Exception as e:
|
468
|
+
raise QueryExecutionError(f"Failed to get query details: {str(e)}")
|
469
|
+
|
470
|
+
def delete_query_from_history(self, query_id: str, **kwargs) -> bool:
|
471
|
+
"""
|
472
|
+
Delete a query from the user's history.
|
473
|
+
|
474
|
+
Args:
|
475
|
+
query_id: ID of the query to delete
|
476
|
+
**kwargs: Additional keyword arguments to pass to the request (e.g., verify=False)
|
477
|
+
|
478
|
+
Returns:
|
479
|
+
True if the query was successfully deleted
|
480
|
+
|
481
|
+
Raises:
|
482
|
+
ResourceNotFoundError: If the query is not found
|
483
|
+
"""
|
484
|
+
try:
|
485
|
+
response = self.auth_manager.request(
|
486
|
+
method="DELETE",
|
487
|
+
url=f"/adss/v1/queries/{query_id}",
|
488
|
+
auth_required=True,
|
489
|
+
**kwargs
|
490
|
+
)
|
491
|
+
handle_response_errors(response)
|
492
|
+
|
493
|
+
return True
|
494
|
+
|
495
|
+
except ResourceNotFoundError:
|
496
|
+
raise
|
497
|
+
except Exception as e:
|
498
|
+
raise QueryExecutionError(f"Failed to delete query from history: {str(e)}")
|