adss 1.0__py3-none-any.whl → 1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,498 @@
1
+ """
2
+ Query execution and management functionality for the Astronomy TAP Client.
3
+ """
4
+ import time
5
+ import requests
6
+ from typing import Dict, List, Optional, Union, Any, BinaryIO, Tuple
7
+ import io
8
+ import pandas as pd
9
+
10
+ from adss.exceptions import QueryExecutionError, ResourceNotFoundError
11
+ from adss.utils import handle_response_errors, parquet_to_dataframe
12
+ from adss.models.query import Query, QueryResult
13
+
14
+
15
+ class QueriesEndpoint:
16
+ """
17
+ Handles query execution and management.
18
+ """
19
+
20
+ def __init__(self, base_url: str, auth_manager):
21
+ """
22
+ Initialize the Queries endpoint.
23
+
24
+ Args:
25
+ base_url: The base URL of the API server
26
+ auth_manager: Authentication manager providing auth headers
27
+ """
28
+ self.base_url = base_url.rstrip('/')
29
+ self.auth_manager = auth_manager
30
+
31
+ def execute_sync(self,
32
+ query: str,
33
+ mode: str = 'adql',
34
+ file: Optional[Union[str, BinaryIO]] = None,
35
+ table_name: Optional[str] = None,
36
+ **kwargs) -> QueryResult:
37
+ """
38
+ Execute a query synchronously and return the results.
39
+
40
+ Args:
41
+ query: The query to execute (ADQL or SQL)
42
+ mode: Query mode ('adql' or 'sql')
43
+ file: Optional file path or file-like object to upload as a temporary table
44
+ table_name: Name for the uploaded table (required if file is provided)
45
+ **kwargs: Additional keyword arguments to pass to the request (e.g., verify=False)
46
+
47
+ Returns:
48
+ QueryResult object containing the query data and metadata
49
+
50
+ Raises:
51
+ QueryExecutionError: If the query execution fails
52
+ """
53
+ # Don't include content-type in headers as requests will set it for multipart/form-data
54
+ data = {
55
+ "query": query,
56
+ "mode": mode
57
+ }
58
+
59
+ files = {}
60
+
61
+ # Handle file upload if provided
62
+ if file:
63
+ if not table_name:
64
+ raise ValueError("table_name is required when uploading a file")
65
+
66
+ # If file is a string, open the file
67
+ if isinstance(file, str):
68
+ file_obj = open(file, 'rb')
69
+ close_file = True
70
+ else:
71
+ file_obj = file
72
+ close_file = False
73
+
74
+ try:
75
+ files = {
76
+ "file": file_obj
77
+ }
78
+ data["table_name"] = table_name
79
+
80
+ response = self.auth_manager.request(
81
+ method="POST",
82
+ url="/adss/sync",
83
+ data=data,
84
+ files=files,
85
+ **kwargs
86
+ )
87
+ finally:
88
+ if close_file:
89
+ file_obj.close()
90
+ else:
91
+ # No file upload
92
+ response = self.auth_manager.request(
93
+ method="POST",
94
+ url="/adss/sync",
95
+ data=data,
96
+ **kwargs
97
+ )
98
+
99
+ try:
100
+ handle_response_errors(response)
101
+
102
+ # Extract metadata from headers
103
+ execution_time = int(response.headers.get('X-Execution-Time-Ms', 0))
104
+ row_count = int(response.headers.get('X-Row-Count', 0))
105
+
106
+ # Create a minimal Query object for the QueryResult
107
+ query_obj = Query(
108
+ id="sync_query", # Synchronous queries don't have an ID
109
+ query_text=query,
110
+ status="COMPLETED",
111
+ created_at=pd.Timestamp.now(),
112
+ mode=mode,
113
+ completed_at=pd.Timestamp.now(),
114
+ execution_time_ms=execution_time,
115
+ row_count=row_count
116
+ )
117
+
118
+ # Parse Parquet data
119
+ df = parquet_to_dataframe(response.content)
120
+
121
+ return QueryResult(
122
+ query=query_obj,
123
+ data=df,
124
+ execution_time_ms=execution_time,
125
+ row_count=row_count,
126
+ column_count=len(df.columns) if not df.empty else 0
127
+ )
128
+
129
+ except Exception as e:
130
+ raise QueryExecutionError(f"Synchronous query execution failed: {str(e)}", query)
131
+
132
+ def execute_async(self,
133
+ query: str,
134
+ mode: str = 'adql',
135
+ file: Optional[Union[str, BinaryIO]] = None,
136
+ table_name: Optional[str] = None,
137
+ **kwargs) -> Query:
138
+ """
139
+ Start an asynchronous query execution.
140
+
141
+ Args:
142
+ query: The query to execute (ADQL or SQL)
143
+ mode: Query mode ('adql' or 'sql')
144
+ file: Optional file path or file-like object to upload as a temporary table
145
+ table_name: Name for the uploaded table (required if file is provided)
146
+ **kwargs: Additional keyword arguments to pass to the request (e.g., verify=False)
147
+
148
+ Returns:
149
+ Query object with status information
150
+
151
+ Raises:
152
+ QueryExecutionError: If starting the query fails
153
+ """
154
+ data = {
155
+ "query": query,
156
+ "mode": mode
157
+ }
158
+
159
+ files = {}
160
+
161
+ # Handle file upload if provided
162
+ if file:
163
+ if not table_name:
164
+ raise ValueError("table_name is required when uploading a file")
165
+
166
+ # If file is a string, open the file
167
+ if isinstance(file, str):
168
+ file_obj = open(file, 'rb')
169
+ close_file = True
170
+ else:
171
+ file_obj = file
172
+ close_file = False
173
+
174
+ try:
175
+ files = {
176
+ "file": file_obj
177
+ }
178
+ data["table_name"] = table_name
179
+
180
+ response = self.auth_manager.request(
181
+ method="POST",
182
+ url="/adss/async",
183
+ data=data,
184
+ files=files,
185
+ auth_required=True,
186
+ **kwargs
187
+ )
188
+ finally:
189
+ if close_file:
190
+ file_obj.close()
191
+ else:
192
+ # No file upload
193
+ response = self.auth_manager.request(
194
+ method="POST",
195
+ url="/adss/async",
196
+ data=data,
197
+ auth_required=True,
198
+ **kwargs
199
+ )
200
+
201
+ try:
202
+ handle_response_errors(response)
203
+ job_data = response.json()
204
+ return Query.from_dict(job_data)
205
+
206
+ except Exception as e:
207
+ raise QueryExecutionError(f"Failed to start asynchronous query: {str(e)}", query)
208
+
209
+ def get_status(self, query_id: str, **kwargs) -> Query:
210
+ """
211
+ Get the status of an asynchronous query.
212
+
213
+ Args:
214
+ query_id: ID of the query to check
215
+ **kwargs: Additional keyword arguments to pass to the request (e.g., verify=False)
216
+
217
+ Returns:
218
+ Updated Query object with current status
219
+
220
+ Raises:
221
+ ResourceNotFoundError: If the query is not found
222
+ """
223
+ try:
224
+ response = self.auth_manager.request(
225
+ method="GET",
226
+ url=f"/adss/async/{query_id}",
227
+ auth_required=True,
228
+ **kwargs
229
+ )
230
+ handle_response_errors(response)
231
+
232
+ job_data = response.json()
233
+ return Query.from_dict(job_data)
234
+
235
+ except ResourceNotFoundError:
236
+ raise
237
+ except Exception as e:
238
+ raise QueryExecutionError(f"Failed to get query status: {str(e)}")
239
+
240
+ def get_results(self, query_id: str, **kwargs) -> QueryResult:
241
+ """
242
+ Get the results of a completed asynchronous query.
243
+
244
+ Args:
245
+ query_id: ID of the completed query
246
+ **kwargs: Additional keyword arguments to pass to the request (e.g., verify=False)
247
+
248
+ Returns:
249
+ QueryResult object with the query data
250
+
251
+ Raises:
252
+ ResourceNotFoundError: If the query is not found
253
+ QueryExecutionError: If the query is not completed or results can't be retrieved
254
+ """
255
+ # First get the query status
256
+ query = self.get_status(query_id, **kwargs)
257
+
258
+ if not query.is_complete:
259
+ raise QueryExecutionError(
260
+ f"Cannot get results: Query is not completed (status: {query.status})",
261
+ query_id
262
+ )
263
+
264
+ if query.is_failed:
265
+ raise QueryExecutionError(
266
+ f"Cannot get results: Query failed with error: {query.error}",
267
+ query_id
268
+ )
269
+
270
+ # Get the results
271
+ try:
272
+ response = self.auth_manager.request(
273
+ method="GET",
274
+ url=f"/adss/async/{query_id}/results",
275
+ auth_required=True,
276
+ **kwargs
277
+ )
278
+ handle_response_errors(response)
279
+
280
+ # Parse Parquet data
281
+ df = parquet_to_dataframe(response.content)
282
+
283
+ # Extract metadata
284
+ expires_at = response.headers.get('X-Expires-At')
285
+ if expires_at:
286
+ query.expires_at = pd.Timestamp(expires_at)
287
+
288
+ return QueryResult(
289
+ query=query,
290
+ data=df,
291
+ execution_time_ms=query.execution_time_ms,
292
+ row_count=query.row_count or len(df),
293
+ column_count=len(df.columns) if not df.empty else 0
294
+ )
295
+
296
+ except Exception as e:
297
+ raise QueryExecutionError(f"Failed to get query results: {str(e)}", query_id)
298
+
299
+ def cancel_query(self, query_id: str, **kwargs) -> bool:
300
+ """
301
+ Cancel an asynchronous query.
302
+
303
+ Args:
304
+ query_id: ID of the query to cancel
305
+ **kwargs: Additional keyword arguments to pass to the request (e.g., verify=False)
306
+
307
+ Returns:
308
+ True if the query was successfully canceled
309
+
310
+ Raises:
311
+ ResourceNotFoundError: If the query is not found
312
+ QueryExecutionError: If canceling the query fails
313
+ """
314
+ try:
315
+ response = self.auth_manager.request(
316
+ method="DELETE",
317
+ url=f"/adss/async/{query_id}",
318
+ auth_required=True,
319
+ **kwargs
320
+ )
321
+ handle_response_errors(response)
322
+
323
+ return True
324
+
325
+ except ResourceNotFoundError:
326
+ raise
327
+ except Exception as e:
328
+ raise QueryExecutionError(f"Failed to cancel query: {str(e)}", query_id)
329
+
330
+ def wait_for_completion(self,
331
+ query_id: str,
332
+ timeout: Optional[int] = None,
333
+ poll_interval: int = 2,
334
+ **kwargs) -> Query:
335
+ """
336
+ Wait for an asynchronous query to complete.
337
+
338
+ Args:
339
+ query_id: ID of the query to wait for
340
+ timeout: Maximum time to wait in seconds (None for no timeout)
341
+ poll_interval: Time between status checks in seconds
342
+ **kwargs: Additional keyword arguments to pass to the request (e.g., verify=False)
343
+
344
+ Returns:
345
+ Completed Query object
346
+
347
+ Raises:
348
+ ResourceNotFoundError: If the query is not found
349
+ TimeoutError: If the query doesn't complete within the timeout
350
+ QueryExecutionError: If the query fails
351
+ """
352
+ start_time = time.time()
353
+
354
+ while True:
355
+ query = self.get_status(query_id, **kwargs)
356
+
357
+ if query.is_complete:
358
+ return query
359
+
360
+ if timeout and (time.time() - start_time > timeout):
361
+ raise TimeoutError(f"Query did not complete within {timeout} seconds")
362
+
363
+ time.sleep(poll_interval)
364
+
365
+ def execute_and_wait(self,
366
+ query: str,
367
+ mode: str = 'adql',
368
+ file: Optional[Union[str, BinaryIO]] = None,
369
+ table_name: Optional[str] = None,
370
+ timeout: Optional[int] = None,
371
+ poll_interval: int = 2,
372
+ **kwargs) -> QueryResult:
373
+ """
374
+ Execute a query asynchronously and wait for the results.
375
+
376
+ Args:
377
+ query: The query to execute (ADQL or SQL)
378
+ mode: Query mode ('adql' or 'sql')
379
+ file: Optional file path or file-like object to upload as a temporary table
380
+ table_name: Name for the uploaded table (required if file is provided)
381
+ timeout: Maximum time to wait in seconds (None for no timeout)
382
+ poll_interval: Time between status checks in seconds
383
+ **kwargs: Additional keyword arguments to pass to the request (e.g., verify=False)
384
+
385
+ Returns:
386
+ QueryResult object containing the query data and metadata
387
+
388
+ Raises:
389
+ QueryExecutionError: If the query execution fails
390
+ TimeoutError: If the query doesn't complete within the timeout
391
+ """
392
+ # Start async query
393
+ query_obj = self.execute_async(query, mode, file, table_name, **kwargs)
394
+
395
+ # Wait for completion
396
+ completed_query = self.wait_for_completion(query_obj.id, timeout, poll_interval, **kwargs)
397
+
398
+ if completed_query.is_failed:
399
+ raise QueryExecutionError(
400
+ f"Query failed with error: {completed_query.error}",
401
+ query
402
+ )
403
+
404
+ # Get results
405
+ return self.get_results(completed_query.id, **kwargs)
406
+
407
+ def get_history(self, limit: int = 50, **kwargs) -> List[Query]:
408
+ """
409
+ Get the current user's query history.
410
+
411
+ Args:
412
+ limit: Maximum number of queries to return
413
+ **kwargs: Additional keyword arguments to pass to the request (e.g., verify=False)
414
+
415
+ Returns:
416
+ List of Query objects representing past queries
417
+
418
+ Raises:
419
+ AuthenticationError: If not authenticated
420
+ """
421
+ params = {"limit": limit}
422
+
423
+ try:
424
+ response = self.auth_manager.request(
425
+ method="GET",
426
+ url="/adss/v1/queries/me",
427
+ params=params,
428
+ auth_required=True,
429
+ **kwargs
430
+ )
431
+ handle_response_errors(response)
432
+
433
+ queries_data = response.json()
434
+ return [Query.from_dict(q) for q in queries_data]
435
+
436
+ except Exception as e:
437
+ raise QueryExecutionError(f"Failed to get query history: {str(e)}")
438
+
439
+ def get_query_details(self, query_id: str, **kwargs) -> Query:
440
+ """
441
+ Get detailed information about a specific query.
442
+
443
+ Args:
444
+ query_id: ID of the query
445
+ **kwargs: Additional keyword arguments to pass to the request (e.g., verify=False)
446
+
447
+ Returns:
448
+ Query object with detailed information
449
+
450
+ Raises:
451
+ ResourceNotFoundError: If the query is not found
452
+ """
453
+ try:
454
+ response = self.auth_manager.request(
455
+ method="GET",
456
+ url=f"/adss/v1/queries/{query_id}",
457
+ auth_required=True,
458
+ **kwargs
459
+ )
460
+ handle_response_errors(response)
461
+
462
+ query_data = response.json()
463
+ return Query.from_dict(query_data)
464
+
465
+ except ResourceNotFoundError:
466
+ raise
467
+ except Exception as e:
468
+ raise QueryExecutionError(f"Failed to get query details: {str(e)}")
469
+
470
+ def delete_query_from_history(self, query_id: str, **kwargs) -> bool:
471
+ """
472
+ Delete a query from the user's history.
473
+
474
+ Args:
475
+ query_id: ID of the query to delete
476
+ **kwargs: Additional keyword arguments to pass to the request (e.g., verify=False)
477
+
478
+ Returns:
479
+ True if the query was successfully deleted
480
+
481
+ Raises:
482
+ ResourceNotFoundError: If the query is not found
483
+ """
484
+ try:
485
+ response = self.auth_manager.request(
486
+ method="DELETE",
487
+ url=f"/adss/v1/queries/{query_id}",
488
+ auth_required=True,
489
+ **kwargs
490
+ )
491
+ handle_response_errors(response)
492
+
493
+ return True
494
+
495
+ except ResourceNotFoundError:
496
+ raise
497
+ except Exception as e:
498
+ raise QueryExecutionError(f"Failed to delete query from history: {str(e)}")