adss 0.1__py3-none-any.whl → 1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adss/__init__.py +24 -0
- adss/adss_manager.py +53 -0
- adss/auth.py +121 -0
- adss/client.py +671 -0
- adss/endpoints/__init__.py +14 -0
- adss/endpoints/admin.py +433 -0
- adss/endpoints/images.py +898 -0
- adss/endpoints/metadata.py +216 -0
- adss/endpoints/queries.py +498 -0
- adss/endpoints/users.py +311 -0
- adss/exceptions.py +57 -0
- adss/executors/async_query.py +4 -3
- adss/executors/sync_query.py +9 -3
- adss/models/__init__.py +13 -0
- adss/models/metadata.py +138 -0
- adss/models/query.py +134 -0
- adss/models/user.py +123 -0
- adss/table.py +295 -0
- adss/utils/__init__.py +0 -0
- adss/utils/format_table.py +115 -0
- adss/utils.py +107 -0
- adss-1.1.dist-info/LICENSE +11 -0
- {adss-0.1.dist-info → adss-1.1.dist-info}/METADATA +2 -2
- adss-1.1.dist-info/RECORD +30 -0
- {adss-0.1.dist-info → adss-1.1.dist-info}/WHEEL +1 -1
- adss-0.1.dist-info/RECORD +0 -11
- {adss-0.1.dist-info → adss-1.1.dist-info}/top_level.txt +0 -0
adss/models/query.py
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
"""
|
2
|
+
Query-related data models for the Astronomy TAP Client.
|
3
|
+
"""
|
4
|
+
from dataclasses import dataclass
|
5
|
+
from typing import Dict, Optional, Any, List
|
6
|
+
from datetime import datetime
|
7
|
+
import pandas as pd
|
8
|
+
|
9
|
+
from ..utils import parse_datetime
|
10
|
+
|
11
|
+
|
12
|
+
@dataclass
|
13
|
+
class Query:
|
14
|
+
"""
|
15
|
+
Represents a database query and its metadata.
|
16
|
+
"""
|
17
|
+
id: str
|
18
|
+
query_text: str
|
19
|
+
status: str # 'PENDING', 'QUEUED', 'RUNNING', 'COMPLETED', 'ERROR'
|
20
|
+
created_at: datetime
|
21
|
+
mode: str = 'adql' # 'adql' or 'sql'
|
22
|
+
user_id: Optional[str] = None
|
23
|
+
completed_at: Optional[datetime] = None
|
24
|
+
result_url: Optional[str] = None
|
25
|
+
error: Optional[str] = None
|
26
|
+
execution_time_ms: Optional[int] = None
|
27
|
+
row_count: Optional[int] = None
|
28
|
+
position_in_queue: Optional[int] = None
|
29
|
+
expires_at: Optional[datetime] = None
|
30
|
+
query_metadata: Optional[Dict[str, Any]] = None
|
31
|
+
|
32
|
+
@classmethod
|
33
|
+
def from_dict(cls, data: Dict[str, Any]) -> 'Query':
|
34
|
+
"""Create a Query object from a dictionary."""
|
35
|
+
query_id = data.get('id')
|
36
|
+
query_text = data.get('query_text')
|
37
|
+
status = data.get('status')
|
38
|
+
mode = data.get('mode', 'adql')
|
39
|
+
user_id = data.get('user_id')
|
40
|
+
|
41
|
+
created_at = parse_datetime(data.get('created_at'))
|
42
|
+
completed_at = parse_datetime(data.get('completed_at'))
|
43
|
+
expires_at = parse_datetime(data.get('expires_at'))
|
44
|
+
|
45
|
+
result_url = data.get('result_url')
|
46
|
+
error = data.get('error')
|
47
|
+
execution_time_ms = data.get('execution_time_ms')
|
48
|
+
row_count = data.get('row_count')
|
49
|
+
position_in_queue = data.get('position_in_queue')
|
50
|
+
query_metadata = data.get('query_metadata')
|
51
|
+
|
52
|
+
return cls(
|
53
|
+
id=query_id,
|
54
|
+
query_text=query_text,
|
55
|
+
status=status,
|
56
|
+
mode=mode,
|
57
|
+
user_id=user_id,
|
58
|
+
created_at=created_at,
|
59
|
+
completed_at=completed_at,
|
60
|
+
result_url=result_url,
|
61
|
+
error=error,
|
62
|
+
execution_time_ms=execution_time_ms,
|
63
|
+
row_count=row_count,
|
64
|
+
position_in_queue=position_in_queue,
|
65
|
+
expires_at=expires_at,
|
66
|
+
query_metadata=query_metadata
|
67
|
+
)
|
68
|
+
|
69
|
+
@property
|
70
|
+
def is_complete(self) -> bool:
|
71
|
+
"""Check if the query has completed (successfully or with error)."""
|
72
|
+
return self.status in ['COMPLETED', 'ERROR']
|
73
|
+
|
74
|
+
@property
|
75
|
+
def is_running(self) -> bool:
|
76
|
+
"""Check if the query is currently running."""
|
77
|
+
return self.status == 'RUNNING'
|
78
|
+
|
79
|
+
@property
|
80
|
+
def is_queued(self) -> bool:
|
81
|
+
"""Check if the query is queued."""
|
82
|
+
return self.status == 'QUEUED'
|
83
|
+
|
84
|
+
@property
|
85
|
+
def is_successful(self) -> bool:
|
86
|
+
"""Check if the query completed successfully."""
|
87
|
+
return self.status == 'COMPLETED'
|
88
|
+
|
89
|
+
@property
|
90
|
+
def is_failed(self) -> bool:
|
91
|
+
"""Check if the query failed."""
|
92
|
+
return self.status == 'ERROR'
|
93
|
+
|
94
|
+
|
95
|
+
@dataclass
|
96
|
+
class QueryResult:
|
97
|
+
"""
|
98
|
+
Represents the result of a query, including the data and metadata.
|
99
|
+
"""
|
100
|
+
query: Query
|
101
|
+
data: pd.DataFrame
|
102
|
+
execution_time_ms: Optional[int] = None
|
103
|
+
row_count: Optional[int] = None
|
104
|
+
column_count: Optional[int] = None
|
105
|
+
|
106
|
+
def to_csv(self, path: str, **kwargs) -> None:
|
107
|
+
"""Save the query result to a CSV file."""
|
108
|
+
self.data.to_csv(path, **kwargs)
|
109
|
+
|
110
|
+
def to_parquet(self, path: str, **kwargs) -> None:
|
111
|
+
"""Save the query result to a Parquet file."""
|
112
|
+
self.data.to_parquet(path, **kwargs)
|
113
|
+
|
114
|
+
def to_json(self, path: str = None, **kwargs) -> Optional[str]:
|
115
|
+
"""
|
116
|
+
Convert the query result to JSON.
|
117
|
+
If path is provided, saves to file, otherwise returns a JSON string.
|
118
|
+
"""
|
119
|
+
if path:
|
120
|
+
self.data.to_json(path, **kwargs)
|
121
|
+
return None
|
122
|
+
return self.data.to_json(**kwargs)
|
123
|
+
|
124
|
+
def head(self, n: int = 5) -> pd.DataFrame:
|
125
|
+
"""Return the first n rows of the result."""
|
126
|
+
return self.data.head(n)
|
127
|
+
|
128
|
+
def tail(self, n: int = 5) -> pd.DataFrame:
|
129
|
+
"""Return the last n rows of the result."""
|
130
|
+
return self.data.tail(n)
|
131
|
+
|
132
|
+
def describe(self) -> pd.DataFrame:
|
133
|
+
"""Return summary statistics of the result."""
|
134
|
+
return self.data.describe()
|
adss/models/user.py
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
"""
|
2
|
+
User-related data models for the Astronomy TAP Client.
|
3
|
+
"""
|
4
|
+
from dataclasses import dataclass, field
|
5
|
+
from typing import List, Dict, Optional, Any
|
6
|
+
from datetime import datetime
|
7
|
+
|
8
|
+
from ..utils import parse_datetime
|
9
|
+
|
10
|
+
|
11
|
+
@dataclass
|
12
|
+
class SchemaPermission:
|
13
|
+
"""Schema-level permission."""
|
14
|
+
schema_name: str
|
15
|
+
permission: str # 'read', 'write', or 'all'
|
16
|
+
|
17
|
+
|
18
|
+
@dataclass
|
19
|
+
class TablePermission:
|
20
|
+
"""Table-level permission."""
|
21
|
+
schema_name: str
|
22
|
+
table_name: str
|
23
|
+
permission: str # 'read', 'write', or 'all'
|
24
|
+
|
25
|
+
|
26
|
+
@dataclass
|
27
|
+
class RolePermissions:
|
28
|
+
"""Permissions associated with a role."""
|
29
|
+
schema_permissions: List[SchemaPermission] = field(default_factory=list)
|
30
|
+
table_permissions: List[TablePermission] = field(default_factory=list)
|
31
|
+
|
32
|
+
@classmethod
|
33
|
+
def from_dict(cls, data: Dict[str, Any]) -> 'RolePermissions':
|
34
|
+
"""Create a RolePermissions object from a dictionary."""
|
35
|
+
schema_perms = [
|
36
|
+
SchemaPermission(**p)
|
37
|
+
for p in data.get('schema_permissions', [])
|
38
|
+
]
|
39
|
+
|
40
|
+
table_perms = [
|
41
|
+
TablePermission(**p)
|
42
|
+
for p in data.get('table_permissions', [])
|
43
|
+
]
|
44
|
+
|
45
|
+
return cls(
|
46
|
+
schema_permissions=schema_perms,
|
47
|
+
table_permissions=table_perms
|
48
|
+
)
|
49
|
+
|
50
|
+
|
51
|
+
@dataclass
|
52
|
+
class Role:
|
53
|
+
"""User role with associated permissions."""
|
54
|
+
id: int
|
55
|
+
name: str
|
56
|
+
description: Optional[str] = None
|
57
|
+
permissions: Optional[RolePermissions] = None
|
58
|
+
|
59
|
+
@classmethod
|
60
|
+
def from_dict(cls, data: Dict[str, Any]) -> 'Role':
|
61
|
+
"""Create a Role object from a dictionary."""
|
62
|
+
role_id = data.get('id')
|
63
|
+
name = data.get('name')
|
64
|
+
description = data.get('description')
|
65
|
+
|
66
|
+
permissions_data = data.get('permissions')
|
67
|
+
permissions = None
|
68
|
+
if permissions_data:
|
69
|
+
permissions = RolePermissions.from_dict(permissions_data)
|
70
|
+
|
71
|
+
return cls(
|
72
|
+
id=role_id,
|
73
|
+
name=name,
|
74
|
+
description=description,
|
75
|
+
permissions=permissions
|
76
|
+
)
|
77
|
+
|
78
|
+
|
79
|
+
@dataclass
|
80
|
+
class User:
|
81
|
+
"""User model with authentication and role information."""
|
82
|
+
id: str
|
83
|
+
username: str
|
84
|
+
email: str
|
85
|
+
full_name: Optional[str] = None
|
86
|
+
is_active: bool = True
|
87
|
+
is_staff: bool = False
|
88
|
+
is_superuser: bool = False
|
89
|
+
created_at: Optional[datetime] = None
|
90
|
+
last_login: Optional[datetime] = None
|
91
|
+
roles: List[Role] = field(default_factory=list)
|
92
|
+
|
93
|
+
@classmethod
|
94
|
+
def from_dict(cls, data: Dict[str, Any]) -> 'User':
|
95
|
+
"""Create a User object from a dictionary."""
|
96
|
+
user_id = data.get('id')
|
97
|
+
username = data.get('username')
|
98
|
+
email = data.get('email')
|
99
|
+
full_name = data.get('full_name')
|
100
|
+
is_active = data.get('is_active', True)
|
101
|
+
is_staff = data.get('is_staff', False)
|
102
|
+
is_superuser = data.get('is_superuser', False)
|
103
|
+
|
104
|
+
created_at = parse_datetime(data.get('created_at'))
|
105
|
+
last_login = parse_datetime(data.get('last_login'))
|
106
|
+
|
107
|
+
roles = [
|
108
|
+
Role.from_dict(role_data)
|
109
|
+
for role_data in data.get('roles', [])
|
110
|
+
]
|
111
|
+
|
112
|
+
return cls(
|
113
|
+
id=user_id,
|
114
|
+
username=username,
|
115
|
+
email=email,
|
116
|
+
full_name=full_name,
|
117
|
+
is_active=is_active,
|
118
|
+
is_staff=is_staff,
|
119
|
+
is_superuser=is_superuser,
|
120
|
+
created_at=created_at,
|
121
|
+
last_login=last_login,
|
122
|
+
roles=roles
|
123
|
+
)
|
adss/table.py
ADDED
@@ -0,0 +1,295 @@
|
|
1
|
+
from adss.executors.sync_query import execute_sync
|
2
|
+
from adss.executors.async_query import execute_async
|
3
|
+
from adss.utils import format_table
|
4
|
+
|
5
|
+
import re
|
6
|
+
|
7
|
+
class Table:
|
8
|
+
def __init__(self, name, columns):
|
9
|
+
self.name = name
|
10
|
+
self.columns = columns
|
11
|
+
self.selected_columns = []
|
12
|
+
self.constrains = []
|
13
|
+
|
14
|
+
def __repr__(self):
|
15
|
+
return f"Table(name={self.name}, columns={len(self.columns)})"
|
16
|
+
|
17
|
+
def __str__(self):
|
18
|
+
return f"Table: {self.name} ({len(self.columns)} columns)"
|
19
|
+
|
20
|
+
def check_column(self, column):
|
21
|
+
return column in self.columns
|
22
|
+
|
23
|
+
def format_columns(self, columns):
|
24
|
+
# Use provided columns list rather than an undefined variable
|
25
|
+
return ','.join(columns)
|
26
|
+
|
27
|
+
def set_columns(self, columns):
|
28
|
+
if not isinstance(columns, list):
|
29
|
+
columns = [columns]
|
30
|
+
for column in columns:
|
31
|
+
if not self.check_column(column):
|
32
|
+
raise ValueError(f"Column {column} not in table {self.name}, options are {self.columns}")
|
33
|
+
self.selected_columns = columns
|
34
|
+
|
35
|
+
def set_constrains(self, constrains):
|
36
|
+
self.constrains = constrains
|
37
|
+
|
38
|
+
def cone_search(self, ra, dec, radius_arcsec, columns=None, method = 'sync'):
|
39
|
+
if radius_arcsec < 0:
|
40
|
+
raise ValueError("Radius must be positive")
|
41
|
+
if radius_arcsec > 60:
|
42
|
+
raise ValueError("Radius must be less than 60 arcsecs")
|
43
|
+
|
44
|
+
if columns:
|
45
|
+
columns_str = self.format_columns(columns)
|
46
|
+
elif self.selected_columns:
|
47
|
+
columns_str = self.format_columns(self.selected_columns)
|
48
|
+
else:
|
49
|
+
columns_str = "*" # Select all columns
|
50
|
+
|
51
|
+
constraints_str = ""
|
52
|
+
if self.constrains:
|
53
|
+
constraints_str = " AND (" + self.constrains + ")"
|
54
|
+
|
55
|
+
query = f"""SELECT {columns_str} FROM {self.name}
|
56
|
+
WHERE 1 = CONTAINS(
|
57
|
+
POINT('ICRS', ra, dec),
|
58
|
+
CIRCLE('ICRS', {ra}, {dec}, {radius_arcsec}/3600.0)
|
59
|
+
){constraints_str}
|
60
|
+
"""
|
61
|
+
|
62
|
+
print(query)
|
63
|
+
if method == 'sync':
|
64
|
+
return execute_sync(query)
|
65
|
+
else:
|
66
|
+
return execute_async(query)
|
67
|
+
|
68
|
+
def cone_cross_match(
|
69
|
+
self,
|
70
|
+
other_table,
|
71
|
+
match_arcsec,
|
72
|
+
ra,
|
73
|
+
dec,
|
74
|
+
radius_arcsec,
|
75
|
+
columns=None,
|
76
|
+
other_columns=None,
|
77
|
+
other_suffix=None,
|
78
|
+
method='sync'
|
79
|
+
):
|
80
|
+
"""
|
81
|
+
Perform a cone search on the current table (t1) and then cross-match with another table (t2)
|
82
|
+
using a matching radius (match_arcsec).
|
83
|
+
|
84
|
+
The query first restricts table t1 to a cone centered at (ra, dec) with a radius of radius_arcsec.
|
85
|
+
Then, for each object in t1, it finds matching objects in table t2 that lie within match_arcsec
|
86
|
+
of the t1 object's coordinates.
|
87
|
+
|
88
|
+
Additionally:
|
89
|
+
- If a non-empty `other_suffix` is provided, each selected column from t2 will be aliased with that suffix.
|
90
|
+
- The constraints for each table are processed so that the columns in the conditions are properly qualified with t1 or t2.
|
91
|
+
|
92
|
+
Parameters:
|
93
|
+
other_table (Table): The table to match against (t2).
|
94
|
+
match_arcsec (float): The cross-match tolerance radius (in arcseconds) between t1 and t2.
|
95
|
+
ra (float): Right Ascension for the cone center (t1).
|
96
|
+
dec (float): Declination for the cone center (t1).
|
97
|
+
radius_arcsec (float): The cone search radius (in arcseconds) for filtering t1.
|
98
|
+
columns (list or None): Columns to select from the current table (t1).
|
99
|
+
other_columns (list or None): Columns to select from the other table (t2).
|
100
|
+
other_suffix (str or None): Optional suffix to append to each t2 column alias.
|
101
|
+
method (str): Use 'sync' for synchronous execution or 'async' for asynchronous.
|
102
|
+
|
103
|
+
Returns:
|
104
|
+
The result of the query execution via execute_sync or execute_async.
|
105
|
+
"""
|
106
|
+
|
107
|
+
# Helper function to qualify constraint column names with the proper alias.
|
108
|
+
# It looks for each column name as a whole word (not already preceded by an alias) and prefixes it.
|
109
|
+
def apply_alias_to_constraint(constraint, alias, columns_list):
|
110
|
+
for col in columns_list:
|
111
|
+
# (?<![\w\.]) ensures that we do not match if the column is already prefixed (like t1.ra)
|
112
|
+
pattern = r'(?<![\w\.])\b' + re.escape(col) + r'\b'
|
113
|
+
constraint = re.sub(pattern, f"{alias}.{col}", constraint)
|
114
|
+
return constraint
|
115
|
+
|
116
|
+
# Validate match_arcsec
|
117
|
+
if match_arcsec <= 0:
|
118
|
+
raise ValueError("Match radius must be positive")
|
119
|
+
if match_arcsec > 3:
|
120
|
+
print("Match radius may be too large; consider a value less than 3 arcsecs")
|
121
|
+
|
122
|
+
# Determine columns for t1
|
123
|
+
if columns:
|
124
|
+
t1_columns_list = columns if isinstance(columns, list) else [columns]
|
125
|
+
t1_columns = ', '.join(f"t1.{col}" for col in t1_columns_list)
|
126
|
+
elif self.selected_columns:
|
127
|
+
t1_columns = ', '.join(f"t1.{col}" for col in self.selected_columns)
|
128
|
+
else:
|
129
|
+
t1_columns = "t1.*"
|
130
|
+
|
131
|
+
# Determine columns for t2, adding suffix if provided
|
132
|
+
if other_columns:
|
133
|
+
t2_columns_list = other_columns if isinstance(other_columns, list) else [other_columns]
|
134
|
+
if other_suffix:
|
135
|
+
t2_columns = ', '.join(f"t2.{col} AS {col}{other_suffix}" for col in t2_columns_list)
|
136
|
+
else:
|
137
|
+
t2_columns = ', '.join(f"t2.{col}" for col in t2_columns_list)
|
138
|
+
elif other_table.selected_columns:
|
139
|
+
if other_suffix:
|
140
|
+
t2_columns = ', '.join(f"t2.{col} AS {col}{other_suffix}" for col in other_table.selected_columns)
|
141
|
+
else:
|
142
|
+
t2_columns = ', '.join(f"t2.{col}" for col in other_table.selected_columns)
|
143
|
+
else:
|
144
|
+
t2_columns = "t2.*"
|
145
|
+
|
146
|
+
# Process constraints for t1: apply alias "t1" to each column mentioned in the constraint.
|
147
|
+
constraints_t1 = ""
|
148
|
+
if self.constrains:
|
149
|
+
if isinstance(self.constrains, str):
|
150
|
+
processed_constraint = apply_alias_to_constraint(self.constrains, "t1", self.columns)
|
151
|
+
constraints_t1 = " AND (" + processed_constraint + ")"
|
152
|
+
elif isinstance(self.constrains, list):
|
153
|
+
processed_constraints = []
|
154
|
+
for c in self.constrains:
|
155
|
+
processed_constraints.append(apply_alias_to_constraint(c, "t1", self.columns))
|
156
|
+
constraints_t1 = " AND (" + " AND ".join(processed_constraints) + ")"
|
157
|
+
|
158
|
+
# Process constraints for t2: apply alias "t2" to each column mentioned in the constraint.
|
159
|
+
constraints_t2 = ""
|
160
|
+
if other_table.constrains:
|
161
|
+
if isinstance(other_table.constrains, str):
|
162
|
+
processed_constraint = apply_alias_to_constraint(other_table.constrains, "t2", other_table.columns)
|
163
|
+
constraints_t2 = " AND (" + processed_constraint + ")"
|
164
|
+
elif isinstance(other_table.constrains, list):
|
165
|
+
processed_constraints = []
|
166
|
+
for c in other_table.constrains:
|
167
|
+
processed_constraints.append(apply_alias_to_constraint(c, "t2", other_table.columns))
|
168
|
+
constraints_t2 = " AND (" + " AND ".join(processed_constraints) + ")"
|
169
|
+
# Build the query:
|
170
|
+
# 1. The first CONTAINS clause performs the cross-match between t1 and t2 with match_arcsec tolerance.
|
171
|
+
# 2. The second CONTAINS clause restricts t1 objects to the cone centered at (ra, dec) with radius radius_arcsec.
|
172
|
+
query = f"""SELECT {t1_columns}, {t2_columns}
|
173
|
+
FROM {self.name} AS t1, {other_table.name} AS t2
|
174
|
+
WHERE 1 = CONTAINS(
|
175
|
+
POINT('ICRS', t2.ra, t2.dec),
|
176
|
+
CIRCLE('ICRS', t1.ra, t1.dec, {match_arcsec}/3600.0)
|
177
|
+
)
|
178
|
+
AND 1 = CONTAINS(
|
179
|
+
POINT('ICRS', t1.ra, t1.dec),
|
180
|
+
CIRCLE('ICRS', {ra}, {dec}, {radius_arcsec}/3600.0)
|
181
|
+
)
|
182
|
+
{constraints_t1}
|
183
|
+
{constraints_t2}
|
184
|
+
"""
|
185
|
+
|
186
|
+
print(query)
|
187
|
+
if method == 'async':
|
188
|
+
return execute_async(query)
|
189
|
+
else:
|
190
|
+
return execute_sync(query)
|
191
|
+
|
192
|
+
def table_cross_match(
|
193
|
+
self,
|
194
|
+
other_table,
|
195
|
+
match_arcsec,
|
196
|
+
columns=None,
|
197
|
+
other_columns=None,
|
198
|
+
other_suffix=None,
|
199
|
+
method='async'
|
200
|
+
):
|
201
|
+
"""
|
202
|
+
Perform a cone search on the current table (t1) and then cross-match with another
|
203
|
+
table (Dataframe or astropy Table) (t2)
|
204
|
+
using a matching radius (match_arcsec).
|
205
|
+
|
206
|
+
For each object in t1, it finds matching objects in table t2 that lie within match_arcsec
|
207
|
+
of the t1 object's coordinates.
|
208
|
+
|
209
|
+
Additionally:
|
210
|
+
- If a non-empty `other_suffix` is provided, each selected column from t2 will be aliased with that suffix.
|
211
|
+
- The constraints for each table are processed so that the columns in the conditions are properly qualified with t1 or t2.
|
212
|
+
|
213
|
+
Parameters:
|
214
|
+
other_table (astropy.table.Table): The table to match against (t2).
|
215
|
+
match_arcsec (float): The cross-match tolerance radius (in arcseconds) between t1 and t2.
|
216
|
+
columns (list or None): Columns to select from the current table (t1).
|
217
|
+
other_columns (list or None): Columns to select from the other table (t2).
|
218
|
+
other_suffix (str or None): Optional suffix to append to each t2 column alias.
|
219
|
+
method (str): Use 'sync' for synchronous execution or 'async' for asynchronous.
|
220
|
+
|
221
|
+
Returns:
|
222
|
+
The result of the query execution via execute_sync or execute_async.
|
223
|
+
"""
|
224
|
+
|
225
|
+
# Helper function to qualify constraint column names with the proper alias.
|
226
|
+
# It looks for each column name as a whole word (not already preceded by an alias) and prefixes it.
|
227
|
+
def apply_alias_to_constraint(constraint, alias, columns_list):
|
228
|
+
for col in columns_list:
|
229
|
+
# (?<![\w\.]) ensures that we do not match if the column is already prefixed (like t1.ra)
|
230
|
+
pattern = r'(?<![\w\.])\b' + re.escape(col) + r'\b'
|
231
|
+
constraint = re.sub(pattern, f"{alias}.{col}", constraint)
|
232
|
+
return constraint
|
233
|
+
|
234
|
+
# Validate match_arcsec
|
235
|
+
if match_arcsec <= 0:
|
236
|
+
raise ValueError("Match radius must be positive")
|
237
|
+
if match_arcsec > 3:
|
238
|
+
print("Match radius may be too large; consider a value less than 3 arcsecs")
|
239
|
+
|
240
|
+
# Determine columns for t1
|
241
|
+
if columns:
|
242
|
+
t1_columns_list = columns if isinstance(columns, list) else [columns]
|
243
|
+
t1_columns = ', '.join(f"t1.{col}" for col in t1_columns_list)
|
244
|
+
elif self.selected_columns:
|
245
|
+
t1_columns = ', '.join(f"t1.{col}" for col in self.selected_columns)
|
246
|
+
else:
|
247
|
+
t1_columns = "t1.*"
|
248
|
+
|
249
|
+
# Determine columns for t2, adding suffix if provided
|
250
|
+
if not other_columns:
|
251
|
+
raise ValueError("Must provide columns for the input table (other_columns param)")
|
252
|
+
|
253
|
+
if not "ra" in other_columns or not "dec" in other_columns:
|
254
|
+
raise ValueError("Input table must have 'ra' and 'dec' columns")
|
255
|
+
|
256
|
+
other_table = other_table[other_columns]
|
257
|
+
|
258
|
+
t2_columns_list = other_columns if isinstance(other_columns, list) else [other_columns]
|
259
|
+
if other_suffix:
|
260
|
+
t2_columns = ', '.join(f"t2.{col} AS {col}{other_suffix}" for col in t2_columns_list)
|
261
|
+
else:
|
262
|
+
t2_columns = ', '.join(f"t2.{col}" for col in t2_columns_list)
|
263
|
+
|
264
|
+
# Process constraints for t1: apply alias "t1" to each column mentioned in the constraint.
|
265
|
+
constraints_t1 = ""
|
266
|
+
if self.constrains:
|
267
|
+
if isinstance(self.constrains, str):
|
268
|
+
processed_constraint = apply_alias_to_constraint(self.constrains, "t1", self.columns)
|
269
|
+
constraints_t1 = " (" + processed_constraint + ")"
|
270
|
+
elif isinstance(self.constrains, list):
|
271
|
+
processed_constraints = []
|
272
|
+
for c in self.constrains:
|
273
|
+
processed_constraints.append(apply_alias_to_constraint(c, "t1", self.columns))
|
274
|
+
constraints_t1 = " (" + " AND ".join(processed_constraints) + ")"
|
275
|
+
|
276
|
+
if constraints_t1:
|
277
|
+
constraints_t1 = "WHERE " + constraints_t1
|
278
|
+
# Build the query:
|
279
|
+
# 1. The first CONTAINS clause performs the cross-match between t1 and t2 with match_arcsec tolerance.
|
280
|
+
# 2. The second CONTAINS clause restricts t1 objects to the cone centered at (ra, dec) with radius radius_arcsec.
|
281
|
+
query = f"""SELECT {t1_columns}, {t2_columns}
|
282
|
+
FROM {self.name} AS t1 JOIN tap_upload.upload AS t2 ON
|
283
|
+
1 = CONTAINS(
|
284
|
+
POINT('ICRS', t1.ra, t1.dec),
|
285
|
+
CIRCLE('ICRS', t2.ra, t2.dec, {match_arcsec}/3600.0)
|
286
|
+
)
|
287
|
+
{constraints_t1}
|
288
|
+
"""
|
289
|
+
|
290
|
+
print(query)
|
291
|
+
if method == 'async':
|
292
|
+
return execute_async(query, table_upload=other_table)
|
293
|
+
else:
|
294
|
+
raise ValueError("Synchronous execution not supported yet for table cross-match")
|
295
|
+
#return execute_sync(query)
|
adss/utils/__init__.py
ADDED
File without changes
|
@@ -0,0 +1,115 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from astropy.table import Table
|
3
|
+
|
4
|
+
def vectorized_string_to_masked_array(column_data):
|
5
|
+
"""
|
6
|
+
Fully vectorized conversion of formatted string arrays to NumPy masked arrays.
|
7
|
+
The strings are assumed to be wrapped in curly braces (e.g. "{1,2,3}").
|
8
|
+
Any occurrence of the literal "NULL" in a cell will be masked.
|
9
|
+
|
10
|
+
Parameters
|
11
|
+
----------
|
12
|
+
column_data : numpy.ndarray
|
13
|
+
A 1D NumPy array of strings. Each element is a formatted array like "{1,2,3}".
|
14
|
+
|
15
|
+
Returns
|
16
|
+
-------
|
17
|
+
numpy.ma.MaskedArray
|
18
|
+
A masked array where "NULL" entries are masked.
|
19
|
+
"""
|
20
|
+
# Remove curly braces (but do not remove "NULL")
|
21
|
+
clean_data = np.char.replace(column_data.astype(str), "{", "")
|
22
|
+
clean_data = np.char.replace(clean_data, "}", "")
|
23
|
+
|
24
|
+
# Split each string by comma into a list of items (with possible surrounding whitespace)
|
25
|
+
split_arrays = np.char.split(clean_data, ",")
|
26
|
+
|
27
|
+
# --- Determine type by scanning for a first non-"NULL" value ---
|
28
|
+
first_value = None
|
29
|
+
for row in split_arrays:
|
30
|
+
for item in row:
|
31
|
+
item_str = item.strip()
|
32
|
+
if item_str != "NULL":
|
33
|
+
first_value = item_str
|
34
|
+
break
|
35
|
+
if first_value is not None:
|
36
|
+
break
|
37
|
+
|
38
|
+
# If no non-NULL value is found, default to a masked object array.
|
39
|
+
if first_value is None:
|
40
|
+
data = [np.array(row) for row in split_arrays]
|
41
|
+
mask = [np.full(len(row), True, dtype=bool) for row in split_arrays]
|
42
|
+
return np.ma.masked_array(data, mask=mask)
|
43
|
+
|
44
|
+
# Try to determine numeric type.
|
45
|
+
# (If first_value consists solely of digits, we'll assume integer.
|
46
|
+
# Otherwise, if it can be converted to float, we'll use float.
|
47
|
+
# Else, we default to string.)
|
48
|
+
is_integer = first_value.isdigit()
|
49
|
+
is_float = False
|
50
|
+
if not is_integer:
|
51
|
+
try:
|
52
|
+
float(first_value)
|
53
|
+
is_float = True
|
54
|
+
except Exception:
|
55
|
+
pass
|
56
|
+
|
57
|
+
# Prepare lists to store converted rows and corresponding masks.
|
58
|
+
data_list = []
|
59
|
+
mask_list = []
|
60
|
+
|
61
|
+
# Conversion helper functions
|
62
|
+
def convert_item(item, conv):
|
63
|
+
item = item.strip()
|
64
|
+
if item == "NULL":
|
65
|
+
return None, True
|
66
|
+
else:
|
67
|
+
return conv(item), False
|
68
|
+
|
69
|
+
if is_integer:
|
70
|
+
conv_func = int
|
71
|
+
dtype = np.int64
|
72
|
+
elif is_float:
|
73
|
+
conv_func = float
|
74
|
+
dtype = np.float64
|
75
|
+
else:
|
76
|
+
conv_func = lambda x: x
|
77
|
+
dtype = object
|
78
|
+
|
79
|
+
# Process each row
|
80
|
+
for row in split_arrays:
|
81
|
+
row_vals = []
|
82
|
+
row_mask = []
|
83
|
+
for item in row:
|
84
|
+
val, is_mask = convert_item(item, conv_func)
|
85
|
+
# For masked numeric values, we insert a dummy (0 or 0.0) value.
|
86
|
+
if is_mask:
|
87
|
+
if dtype in (np.int64, np.float64):
|
88
|
+
row_vals.append(0)
|
89
|
+
else:
|
90
|
+
row_vals.append("")
|
91
|
+
else:
|
92
|
+
row_vals.append(val)
|
93
|
+
row_mask.append(is_mask)
|
94
|
+
# Convert row to an array of the target dtype.
|
95
|
+
row_arr = np.array(row_vals, dtype=dtype)
|
96
|
+
data_list.append(row_arr)
|
97
|
+
mask_list.append(np.array(row_mask, dtype=bool))
|
98
|
+
|
99
|
+
# Create and return a masked array.
|
100
|
+
return np.ma.masked_array(data_list, mask=mask_list)
|
101
|
+
|
102
|
+
def format_result_table(tab):
|
103
|
+
if tab is None or len(tab) == 0:
|
104
|
+
return None
|
105
|
+
|
106
|
+
for col in tab.colnames:
|
107
|
+
if len(tab[col]) == 0:
|
108
|
+
continue
|
109
|
+
if not "<U" in str(tab[col].dtype):
|
110
|
+
continue
|
111
|
+
|
112
|
+
if "{" in tab[col][0]:
|
113
|
+
tab[col] = vectorized_string_to_masked_array(tab[col])
|
114
|
+
|
115
|
+
return tab
|