adss 0.1__py3-none-any.whl → 1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
adss/models/query.py ADDED
@@ -0,0 +1,134 @@
1
+ """
2
+ Query-related data models for the Astronomy TAP Client.
3
+ """
4
+ from dataclasses import dataclass
5
+ from typing import Dict, Optional, Any, List
6
+ from datetime import datetime
7
+ import pandas as pd
8
+
9
+ from ..utils import parse_datetime
10
+
11
+
12
+ @dataclass
13
+ class Query:
14
+ """
15
+ Represents a database query and its metadata.
16
+ """
17
+ id: str
18
+ query_text: str
19
+ status: str # 'PENDING', 'QUEUED', 'RUNNING', 'COMPLETED', 'ERROR'
20
+ created_at: datetime
21
+ mode: str = 'adql' # 'adql' or 'sql'
22
+ user_id: Optional[str] = None
23
+ completed_at: Optional[datetime] = None
24
+ result_url: Optional[str] = None
25
+ error: Optional[str] = None
26
+ execution_time_ms: Optional[int] = None
27
+ row_count: Optional[int] = None
28
+ position_in_queue: Optional[int] = None
29
+ expires_at: Optional[datetime] = None
30
+ query_metadata: Optional[Dict[str, Any]] = None
31
+
32
+ @classmethod
33
+ def from_dict(cls, data: Dict[str, Any]) -> 'Query':
34
+ """Create a Query object from a dictionary."""
35
+ query_id = data.get('id')
36
+ query_text = data.get('query_text')
37
+ status = data.get('status')
38
+ mode = data.get('mode', 'adql')
39
+ user_id = data.get('user_id')
40
+
41
+ created_at = parse_datetime(data.get('created_at'))
42
+ completed_at = parse_datetime(data.get('completed_at'))
43
+ expires_at = parse_datetime(data.get('expires_at'))
44
+
45
+ result_url = data.get('result_url')
46
+ error = data.get('error')
47
+ execution_time_ms = data.get('execution_time_ms')
48
+ row_count = data.get('row_count')
49
+ position_in_queue = data.get('position_in_queue')
50
+ query_metadata = data.get('query_metadata')
51
+
52
+ return cls(
53
+ id=query_id,
54
+ query_text=query_text,
55
+ status=status,
56
+ mode=mode,
57
+ user_id=user_id,
58
+ created_at=created_at,
59
+ completed_at=completed_at,
60
+ result_url=result_url,
61
+ error=error,
62
+ execution_time_ms=execution_time_ms,
63
+ row_count=row_count,
64
+ position_in_queue=position_in_queue,
65
+ expires_at=expires_at,
66
+ query_metadata=query_metadata
67
+ )
68
+
69
+ @property
70
+ def is_complete(self) -> bool:
71
+ """Check if the query has completed (successfully or with error)."""
72
+ return self.status in ['COMPLETED', 'ERROR']
73
+
74
+ @property
75
+ def is_running(self) -> bool:
76
+ """Check if the query is currently running."""
77
+ return self.status == 'RUNNING'
78
+
79
+ @property
80
+ def is_queued(self) -> bool:
81
+ """Check if the query is queued."""
82
+ return self.status == 'QUEUED'
83
+
84
+ @property
85
+ def is_successful(self) -> bool:
86
+ """Check if the query completed successfully."""
87
+ return self.status == 'COMPLETED'
88
+
89
+ @property
90
+ def is_failed(self) -> bool:
91
+ """Check if the query failed."""
92
+ return self.status == 'ERROR'
93
+
94
+
95
+ @dataclass
96
+ class QueryResult:
97
+ """
98
+ Represents the result of a query, including the data and metadata.
99
+ """
100
+ query: Query
101
+ data: pd.DataFrame
102
+ execution_time_ms: Optional[int] = None
103
+ row_count: Optional[int] = None
104
+ column_count: Optional[int] = None
105
+
106
+ def to_csv(self, path: str, **kwargs) -> None:
107
+ """Save the query result to a CSV file."""
108
+ self.data.to_csv(path, **kwargs)
109
+
110
+ def to_parquet(self, path: str, **kwargs) -> None:
111
+ """Save the query result to a Parquet file."""
112
+ self.data.to_parquet(path, **kwargs)
113
+
114
+ def to_json(self, path: str = None, **kwargs) -> Optional[str]:
115
+ """
116
+ Convert the query result to JSON.
117
+ If path is provided, saves to file, otherwise returns a JSON string.
118
+ """
119
+ if path:
120
+ self.data.to_json(path, **kwargs)
121
+ return None
122
+ return self.data.to_json(**kwargs)
123
+
124
+ def head(self, n: int = 5) -> pd.DataFrame:
125
+ """Return the first n rows of the result."""
126
+ return self.data.head(n)
127
+
128
+ def tail(self, n: int = 5) -> pd.DataFrame:
129
+ """Return the last n rows of the result."""
130
+ return self.data.tail(n)
131
+
132
+ def describe(self) -> pd.DataFrame:
133
+ """Return summary statistics of the result."""
134
+ return self.data.describe()
adss/models/user.py ADDED
@@ -0,0 +1,123 @@
1
+ """
2
+ User-related data models for the Astronomy TAP Client.
3
+ """
4
+ from dataclasses import dataclass, field
5
+ from typing import List, Dict, Optional, Any
6
+ from datetime import datetime
7
+
8
+ from ..utils import parse_datetime
9
+
10
+
11
+ @dataclass
12
+ class SchemaPermission:
13
+ """Schema-level permission."""
14
+ schema_name: str
15
+ permission: str # 'read', 'write', or 'all'
16
+
17
+
18
+ @dataclass
19
+ class TablePermission:
20
+ """Table-level permission."""
21
+ schema_name: str
22
+ table_name: str
23
+ permission: str # 'read', 'write', or 'all'
24
+
25
+
26
+ @dataclass
27
+ class RolePermissions:
28
+ """Permissions associated with a role."""
29
+ schema_permissions: List[SchemaPermission] = field(default_factory=list)
30
+ table_permissions: List[TablePermission] = field(default_factory=list)
31
+
32
+ @classmethod
33
+ def from_dict(cls, data: Dict[str, Any]) -> 'RolePermissions':
34
+ """Create a RolePermissions object from a dictionary."""
35
+ schema_perms = [
36
+ SchemaPermission(**p)
37
+ for p in data.get('schema_permissions', [])
38
+ ]
39
+
40
+ table_perms = [
41
+ TablePermission(**p)
42
+ for p in data.get('table_permissions', [])
43
+ ]
44
+
45
+ return cls(
46
+ schema_permissions=schema_perms,
47
+ table_permissions=table_perms
48
+ )
49
+
50
+
51
+ @dataclass
52
+ class Role:
53
+ """User role with associated permissions."""
54
+ id: int
55
+ name: str
56
+ description: Optional[str] = None
57
+ permissions: Optional[RolePermissions] = None
58
+
59
+ @classmethod
60
+ def from_dict(cls, data: Dict[str, Any]) -> 'Role':
61
+ """Create a Role object from a dictionary."""
62
+ role_id = data.get('id')
63
+ name = data.get('name')
64
+ description = data.get('description')
65
+
66
+ permissions_data = data.get('permissions')
67
+ permissions = None
68
+ if permissions_data:
69
+ permissions = RolePermissions.from_dict(permissions_data)
70
+
71
+ return cls(
72
+ id=role_id,
73
+ name=name,
74
+ description=description,
75
+ permissions=permissions
76
+ )
77
+
78
+
79
+ @dataclass
80
+ class User:
81
+ """User model with authentication and role information."""
82
+ id: str
83
+ username: str
84
+ email: str
85
+ full_name: Optional[str] = None
86
+ is_active: bool = True
87
+ is_staff: bool = False
88
+ is_superuser: bool = False
89
+ created_at: Optional[datetime] = None
90
+ last_login: Optional[datetime] = None
91
+ roles: List[Role] = field(default_factory=list)
92
+
93
+ @classmethod
94
+ def from_dict(cls, data: Dict[str, Any]) -> 'User':
95
+ """Create a User object from a dictionary."""
96
+ user_id = data.get('id')
97
+ username = data.get('username')
98
+ email = data.get('email')
99
+ full_name = data.get('full_name')
100
+ is_active = data.get('is_active', True)
101
+ is_staff = data.get('is_staff', False)
102
+ is_superuser = data.get('is_superuser', False)
103
+
104
+ created_at = parse_datetime(data.get('created_at'))
105
+ last_login = parse_datetime(data.get('last_login'))
106
+
107
+ roles = [
108
+ Role.from_dict(role_data)
109
+ for role_data in data.get('roles', [])
110
+ ]
111
+
112
+ return cls(
113
+ id=user_id,
114
+ username=username,
115
+ email=email,
116
+ full_name=full_name,
117
+ is_active=is_active,
118
+ is_staff=is_staff,
119
+ is_superuser=is_superuser,
120
+ created_at=created_at,
121
+ last_login=last_login,
122
+ roles=roles
123
+ )
adss/table.py ADDED
@@ -0,0 +1,295 @@
1
+ from adss.executors.sync_query import execute_sync
2
+ from adss.executors.async_query import execute_async
3
+ from adss.utils import format_table
4
+
5
+ import re
6
+
7
+ class Table:
8
+ def __init__(self, name, columns):
9
+ self.name = name
10
+ self.columns = columns
11
+ self.selected_columns = []
12
+ self.constrains = []
13
+
14
+ def __repr__(self):
15
+ return f"Table(name={self.name}, columns={len(self.columns)})"
16
+
17
+ def __str__(self):
18
+ return f"Table: {self.name} ({len(self.columns)} columns)"
19
+
20
+ def check_column(self, column):
21
+ return column in self.columns
22
+
23
+ def format_columns(self, columns):
24
+ # Use provided columns list rather than an undefined variable
25
+ return ','.join(columns)
26
+
27
+ def set_columns(self, columns):
28
+ if not isinstance(columns, list):
29
+ columns = [columns]
30
+ for column in columns:
31
+ if not self.check_column(column):
32
+ raise ValueError(f"Column {column} not in table {self.name}, options are {self.columns}")
33
+ self.selected_columns = columns
34
+
35
+ def set_constrains(self, constrains):
36
+ self.constrains = constrains
37
+
38
+ def cone_search(self, ra, dec, radius_arcsec, columns=None, method = 'sync'):
39
+ if radius_arcsec < 0:
40
+ raise ValueError("Radius must be positive")
41
+ if radius_arcsec > 60:
42
+ raise ValueError("Radius must be less than 60 arcsecs")
43
+
44
+ if columns:
45
+ columns_str = self.format_columns(columns)
46
+ elif self.selected_columns:
47
+ columns_str = self.format_columns(self.selected_columns)
48
+ else:
49
+ columns_str = "*" # Select all columns
50
+
51
+ constraints_str = ""
52
+ if self.constrains:
53
+ constraints_str = " AND (" + self.constrains + ")"
54
+
55
+ query = f"""SELECT {columns_str} FROM {self.name}
56
+ WHERE 1 = CONTAINS(
57
+ POINT('ICRS', ra, dec),
58
+ CIRCLE('ICRS', {ra}, {dec}, {radius_arcsec}/3600.0)
59
+ ){constraints_str}
60
+ """
61
+
62
+ print(query)
63
+ if method == 'sync':
64
+ return execute_sync(query)
65
+ else:
66
+ return execute_async(query)
67
+
68
+ def cone_cross_match(
69
+ self,
70
+ other_table,
71
+ match_arcsec,
72
+ ra,
73
+ dec,
74
+ radius_arcsec,
75
+ columns=None,
76
+ other_columns=None,
77
+ other_suffix=None,
78
+ method='sync'
79
+ ):
80
+ """
81
+ Perform a cone search on the current table (t1) and then cross-match with another table (t2)
82
+ using a matching radius (match_arcsec).
83
+
84
+ The query first restricts table t1 to a cone centered at (ra, dec) with a radius of radius_arcsec.
85
+ Then, for each object in t1, it finds matching objects in table t2 that lie within match_arcsec
86
+ of the t1 object's coordinates.
87
+
88
+ Additionally:
89
+ - If a non-empty `other_suffix` is provided, each selected column from t2 will be aliased with that suffix.
90
+ - The constraints for each table are processed so that the columns in the conditions are properly qualified with t1 or t2.
91
+
92
+ Parameters:
93
+ other_table (Table): The table to match against (t2).
94
+ match_arcsec (float): The cross-match tolerance radius (in arcseconds) between t1 and t2.
95
+ ra (float): Right Ascension for the cone center (t1).
96
+ dec (float): Declination for the cone center (t1).
97
+ radius_arcsec (float): The cone search radius (in arcseconds) for filtering t1.
98
+ columns (list or None): Columns to select from the current table (t1).
99
+ other_columns (list or None): Columns to select from the other table (t2).
100
+ other_suffix (str or None): Optional suffix to append to each t2 column alias.
101
+ method (str): Use 'sync' for synchronous execution or 'async' for asynchronous.
102
+
103
+ Returns:
104
+ The result of the query execution via execute_sync or execute_async.
105
+ """
106
+
107
+ # Helper function to qualify constraint column names with the proper alias.
108
+ # It looks for each column name as a whole word (not already preceded by an alias) and prefixes it.
109
+ def apply_alias_to_constraint(constraint, alias, columns_list):
110
+ for col in columns_list:
111
+ # (?<![\w\.]) ensures that we do not match if the column is already prefixed (like t1.ra)
112
+ pattern = r'(?<![\w\.])\b' + re.escape(col) + r'\b'
113
+ constraint = re.sub(pattern, f"{alias}.{col}", constraint)
114
+ return constraint
115
+
116
+ # Validate match_arcsec
117
+ if match_arcsec <= 0:
118
+ raise ValueError("Match radius must be positive")
119
+ if match_arcsec > 3:
120
+ print("Match radius may be too large; consider a value less than 3 arcsecs")
121
+
122
+ # Determine columns for t1
123
+ if columns:
124
+ t1_columns_list = columns if isinstance(columns, list) else [columns]
125
+ t1_columns = ', '.join(f"t1.{col}" for col in t1_columns_list)
126
+ elif self.selected_columns:
127
+ t1_columns = ', '.join(f"t1.{col}" for col in self.selected_columns)
128
+ else:
129
+ t1_columns = "t1.*"
130
+
131
+ # Determine columns for t2, adding suffix if provided
132
+ if other_columns:
133
+ t2_columns_list = other_columns if isinstance(other_columns, list) else [other_columns]
134
+ if other_suffix:
135
+ t2_columns = ', '.join(f"t2.{col} AS {col}{other_suffix}" for col in t2_columns_list)
136
+ else:
137
+ t2_columns = ', '.join(f"t2.{col}" for col in t2_columns_list)
138
+ elif other_table.selected_columns:
139
+ if other_suffix:
140
+ t2_columns = ', '.join(f"t2.{col} AS {col}{other_suffix}" for col in other_table.selected_columns)
141
+ else:
142
+ t2_columns = ', '.join(f"t2.{col}" for col in other_table.selected_columns)
143
+ else:
144
+ t2_columns = "t2.*"
145
+
146
+ # Process constraints for t1: apply alias "t1" to each column mentioned in the constraint.
147
+ constraints_t1 = ""
148
+ if self.constrains:
149
+ if isinstance(self.constrains, str):
150
+ processed_constraint = apply_alias_to_constraint(self.constrains, "t1", self.columns)
151
+ constraints_t1 = " AND (" + processed_constraint + ")"
152
+ elif isinstance(self.constrains, list):
153
+ processed_constraints = []
154
+ for c in self.constrains:
155
+ processed_constraints.append(apply_alias_to_constraint(c, "t1", self.columns))
156
+ constraints_t1 = " AND (" + " AND ".join(processed_constraints) + ")"
157
+
158
+ # Process constraints for t2: apply alias "t2" to each column mentioned in the constraint.
159
+ constraints_t2 = ""
160
+ if other_table.constrains:
161
+ if isinstance(other_table.constrains, str):
162
+ processed_constraint = apply_alias_to_constraint(other_table.constrains, "t2", other_table.columns)
163
+ constraints_t2 = " AND (" + processed_constraint + ")"
164
+ elif isinstance(other_table.constrains, list):
165
+ processed_constraints = []
166
+ for c in other_table.constrains:
167
+ processed_constraints.append(apply_alias_to_constraint(c, "t2", other_table.columns))
168
+ constraints_t2 = " AND (" + " AND ".join(processed_constraints) + ")"
169
+ # Build the query:
170
+ # 1. The first CONTAINS clause performs the cross-match between t1 and t2 with match_arcsec tolerance.
171
+ # 2. The second CONTAINS clause restricts t1 objects to the cone centered at (ra, dec) with radius radius_arcsec.
172
+ query = f"""SELECT {t1_columns}, {t2_columns}
173
+ FROM {self.name} AS t1, {other_table.name} AS t2
174
+ WHERE 1 = CONTAINS(
175
+ POINT('ICRS', t2.ra, t2.dec),
176
+ CIRCLE('ICRS', t1.ra, t1.dec, {match_arcsec}/3600.0)
177
+ )
178
+ AND 1 = CONTAINS(
179
+ POINT('ICRS', t1.ra, t1.dec),
180
+ CIRCLE('ICRS', {ra}, {dec}, {radius_arcsec}/3600.0)
181
+ )
182
+ {constraints_t1}
183
+ {constraints_t2}
184
+ """
185
+
186
+ print(query)
187
+ if method == 'async':
188
+ return execute_async(query)
189
+ else:
190
+ return execute_sync(query)
191
+
192
+ def table_cross_match(
193
+ self,
194
+ other_table,
195
+ match_arcsec,
196
+ columns=None,
197
+ other_columns=None,
198
+ other_suffix=None,
199
+ method='async'
200
+ ):
201
+ """
202
+ Perform a cone search on the current table (t1) and then cross-match with another
203
+ table (Dataframe or astropy Table) (t2)
204
+ using a matching radius (match_arcsec).
205
+
206
+ For each object in t1, it finds matching objects in table t2 that lie within match_arcsec
207
+ of the t1 object's coordinates.
208
+
209
+ Additionally:
210
+ - If a non-empty `other_suffix` is provided, each selected column from t2 will be aliased with that suffix.
211
+ - The constraints for each table are processed so that the columns in the conditions are properly qualified with t1 or t2.
212
+
213
+ Parameters:
214
+ other_table (astropy.table.Table): The table to match against (t2).
215
+ match_arcsec (float): The cross-match tolerance radius (in arcseconds) between t1 and t2.
216
+ columns (list or None): Columns to select from the current table (t1).
217
+ other_columns (list or None): Columns to select from the other table (t2).
218
+ other_suffix (str or None): Optional suffix to append to each t2 column alias.
219
+ method (str): Use 'sync' for synchronous execution or 'async' for asynchronous.
220
+
221
+ Returns:
222
+ The result of the query execution via execute_sync or execute_async.
223
+ """
224
+
225
+ # Helper function to qualify constraint column names with the proper alias.
226
+ # It looks for each column name as a whole word (not already preceded by an alias) and prefixes it.
227
+ def apply_alias_to_constraint(constraint, alias, columns_list):
228
+ for col in columns_list:
229
+ # (?<![\w\.]) ensures that we do not match if the column is already prefixed (like t1.ra)
230
+ pattern = r'(?<![\w\.])\b' + re.escape(col) + r'\b'
231
+ constraint = re.sub(pattern, f"{alias}.{col}", constraint)
232
+ return constraint
233
+
234
+ # Validate match_arcsec
235
+ if match_arcsec <= 0:
236
+ raise ValueError("Match radius must be positive")
237
+ if match_arcsec > 3:
238
+ print("Match radius may be too large; consider a value less than 3 arcsecs")
239
+
240
+ # Determine columns for t1
241
+ if columns:
242
+ t1_columns_list = columns if isinstance(columns, list) else [columns]
243
+ t1_columns = ', '.join(f"t1.{col}" for col in t1_columns_list)
244
+ elif self.selected_columns:
245
+ t1_columns = ', '.join(f"t1.{col}" for col in self.selected_columns)
246
+ else:
247
+ t1_columns = "t1.*"
248
+
249
+ # Determine columns for t2, adding suffix if provided
250
+ if not other_columns:
251
+ raise ValueError("Must provide columns for the input table (other_columns param)")
252
+
253
+ if not "ra" in other_columns or not "dec" in other_columns:
254
+ raise ValueError("Input table must have 'ra' and 'dec' columns")
255
+
256
+ other_table = other_table[other_columns]
257
+
258
+ t2_columns_list = other_columns if isinstance(other_columns, list) else [other_columns]
259
+ if other_suffix:
260
+ t2_columns = ', '.join(f"t2.{col} AS {col}{other_suffix}" for col in t2_columns_list)
261
+ else:
262
+ t2_columns = ', '.join(f"t2.{col}" for col in t2_columns_list)
263
+
264
+ # Process constraints for t1: apply alias "t1" to each column mentioned in the constraint.
265
+ constraints_t1 = ""
266
+ if self.constrains:
267
+ if isinstance(self.constrains, str):
268
+ processed_constraint = apply_alias_to_constraint(self.constrains, "t1", self.columns)
269
+ constraints_t1 = " (" + processed_constraint + ")"
270
+ elif isinstance(self.constrains, list):
271
+ processed_constraints = []
272
+ for c in self.constrains:
273
+ processed_constraints.append(apply_alias_to_constraint(c, "t1", self.columns))
274
+ constraints_t1 = " (" + " AND ".join(processed_constraints) + ")"
275
+
276
+ if constraints_t1:
277
+ constraints_t1 = "WHERE " + constraints_t1
278
+ # Build the query:
279
+ # 1. The first CONTAINS clause performs the cross-match between t1 and t2 with match_arcsec tolerance.
280
+ # 2. The second CONTAINS clause restricts t1 objects to the cone centered at (ra, dec) with radius radius_arcsec.
281
+ query = f"""SELECT {t1_columns}, {t2_columns}
282
+ FROM {self.name} AS t1 JOIN tap_upload.upload AS t2 ON
283
+ 1 = CONTAINS(
284
+ POINT('ICRS', t1.ra, t1.dec),
285
+ CIRCLE('ICRS', t2.ra, t2.dec, {match_arcsec}/3600.0)
286
+ )
287
+ {constraints_t1}
288
+ """
289
+
290
+ print(query)
291
+ if method == 'async':
292
+ return execute_async(query, table_upload=other_table)
293
+ else:
294
+ raise ValueError("Synchronous execution not supported yet for table cross-match")
295
+ #return execute_sync(query)
adss/utils/__init__.py ADDED
File without changes
@@ -0,0 +1,115 @@
1
+ import numpy as np
2
+ from astropy.table import Table
3
+
4
+ def vectorized_string_to_masked_array(column_data):
5
+ """
6
+ Fully vectorized conversion of formatted string arrays to NumPy masked arrays.
7
+ The strings are assumed to be wrapped in curly braces (e.g. "{1,2,3}").
8
+ Any occurrence of the literal "NULL" in a cell will be masked.
9
+
10
+ Parameters
11
+ ----------
12
+ column_data : numpy.ndarray
13
+ A 1D NumPy array of strings. Each element is a formatted array like "{1,2,3}".
14
+
15
+ Returns
16
+ -------
17
+ numpy.ma.MaskedArray
18
+ A masked array where "NULL" entries are masked.
19
+ """
20
+ # Remove curly braces (but do not remove "NULL")
21
+ clean_data = np.char.replace(column_data.astype(str), "{", "")
22
+ clean_data = np.char.replace(clean_data, "}", "")
23
+
24
+ # Split each string by comma into a list of items (with possible surrounding whitespace)
25
+ split_arrays = np.char.split(clean_data, ",")
26
+
27
+ # --- Determine type by scanning for a first non-"NULL" value ---
28
+ first_value = None
29
+ for row in split_arrays:
30
+ for item in row:
31
+ item_str = item.strip()
32
+ if item_str != "NULL":
33
+ first_value = item_str
34
+ break
35
+ if first_value is not None:
36
+ break
37
+
38
+ # If no non-NULL value is found, default to a masked object array.
39
+ if first_value is None:
40
+ data = [np.array(row) for row in split_arrays]
41
+ mask = [np.full(len(row), True, dtype=bool) for row in split_arrays]
42
+ return np.ma.masked_array(data, mask=mask)
43
+
44
+ # Try to determine numeric type.
45
+ # (If first_value consists solely of digits, we'll assume integer.
46
+ # Otherwise, if it can be converted to float, we'll use float.
47
+ # Else, we default to string.)
48
+ is_integer = first_value.isdigit()
49
+ is_float = False
50
+ if not is_integer:
51
+ try:
52
+ float(first_value)
53
+ is_float = True
54
+ except Exception:
55
+ pass
56
+
57
+ # Prepare lists to store converted rows and corresponding masks.
58
+ data_list = []
59
+ mask_list = []
60
+
61
+ # Conversion helper functions
62
+ def convert_item(item, conv):
63
+ item = item.strip()
64
+ if item == "NULL":
65
+ return None, True
66
+ else:
67
+ return conv(item), False
68
+
69
+ if is_integer:
70
+ conv_func = int
71
+ dtype = np.int64
72
+ elif is_float:
73
+ conv_func = float
74
+ dtype = np.float64
75
+ else:
76
+ conv_func = lambda x: x
77
+ dtype = object
78
+
79
+ # Process each row
80
+ for row in split_arrays:
81
+ row_vals = []
82
+ row_mask = []
83
+ for item in row:
84
+ val, is_mask = convert_item(item, conv_func)
85
+ # For masked numeric values, we insert a dummy (0 or 0.0) value.
86
+ if is_mask:
87
+ if dtype in (np.int64, np.float64):
88
+ row_vals.append(0)
89
+ else:
90
+ row_vals.append("")
91
+ else:
92
+ row_vals.append(val)
93
+ row_mask.append(is_mask)
94
+ # Convert row to an array of the target dtype.
95
+ row_arr = np.array(row_vals, dtype=dtype)
96
+ data_list.append(row_arr)
97
+ mask_list.append(np.array(row_mask, dtype=bool))
98
+
99
+ # Create and return a masked array.
100
+ return np.ma.masked_array(data_list, mask=mask_list)
101
+
102
+ def format_result_table(tab):
103
+ if tab is None or len(tab) == 0:
104
+ return None
105
+
106
+ for col in tab.colnames:
107
+ if len(tab[col]) == 0:
108
+ continue
109
+ if not "<U" in str(tab[col].dtype):
110
+ continue
111
+
112
+ if "{" in tab[col][0]:
113
+ tab[col] = vectorized_string_to_masked_array(tab[col])
114
+
115
+ return tab