robosystems-client 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of robosystems-client might be problematic. Click here for more details.
- robosystems_client/api/query/execute_cypher_query.py +0 -5
- robosystems_client/api/tables/delete_file.py +437 -0
- robosystems_client/api/tables/get_file_info.py +397 -0
- robosystems_client/api/tables/get_upload_url.py +548 -0
- robosystems_client/api/tables/ingest_tables.py +616 -0
- robosystems_client/api/tables/list_table_files.py +509 -0
- robosystems_client/api/tables/list_tables.py +488 -0
- robosystems_client/api/tables/query_tables.py +487 -0
- robosystems_client/api/tables/update_file_status.py +539 -0
- robosystems_client/extensions/graph_client.py +5 -0
- robosystems_client/extensions/table_ingest_client.py +31 -40
- robosystems_client/models/__init__.py +13 -17
- robosystems_client/models/create_graph_request.py +11 -0
- robosystems_client/models/{delete_file_v1_graphs_graph_id_tables_files_file_id_delete_response_delete_file_v1_graphs_graph_id_tables_files_file_id_delete.py → delete_file_response.py} +45 -9
- robosystems_client/models/file_info.py +169 -0
- robosystems_client/models/file_status_update.py +41 -0
- robosystems_client/models/get_file_info_response.py +205 -0
- robosystems_client/models/list_table_files_response.py +105 -0
- robosystems_client/models/{get_file_info_v1_graphs_graph_id_tables_files_file_id_get_response_get_file_info_v1_graphs_graph_id_tables_files_file_id_get.py → update_file_status_response_updatefilestatus.py} +5 -8
- {robosystems_client-0.2.2.dist-info → robosystems_client-0.2.3.dist-info}/METADATA +1 -1
- {robosystems_client-0.2.2.dist-info → robosystems_client-0.2.3.dist-info}/RECORD +23 -22
- robosystems_client/api/tables/delete_file_v1_graphs_graph_id_tables_files_file_id_delete.py +0 -287
- robosystems_client/api/tables/get_file_info_v1_graphs_graph_id_tables_files_file_id_get.py +0 -283
- robosystems_client/api/tables/get_upload_url_v1_graphs_graph_id_tables_table_name_files_post.py +0 -260
- robosystems_client/api/tables/ingest_tables_v1_graphs_graph_id_tables_ingest_post.py +0 -251
- robosystems_client/api/tables/list_table_files_v1_graphs_graph_id_tables_table_name_files_get.py +0 -283
- robosystems_client/api/tables/list_tables_v1_graphs_graph_id_tables_get.py +0 -224
- robosystems_client/api/tables/query_tables_v1_graphs_graph_id_tables_query_post.py +0 -247
- robosystems_client/api/tables/update_file_v1_graphs_graph_id_tables_files_file_id_patch.py +0 -306
- robosystems_client/models/file_update_request.py +0 -62
- robosystems_client/models/list_table_files_v1_graphs_graph_id_tables_table_name_files_get_response_list_table_files_v1_graphs_graph_id_tables_table_name_files_get.py +0 -47
- robosystems_client/models/update_file_v1_graphs_graph_id_tables_files_file_id_patch_response_update_file_v1_graphs_graph_id_tables_files_file_id_patch.py +0 -47
- {robosystems_client-0.2.2.dist-info → robosystems_client-0.2.3.dist-info}/WHEEL +0 -0
- {robosystems_client-0.2.2.dist-info → robosystems_client-0.2.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,487 @@
|
|
|
1
|
+
from http import HTTPStatus
|
|
2
|
+
from typing import Any, Optional, Union, cast
|
|
3
|
+
|
|
4
|
+
import httpx
|
|
5
|
+
|
|
6
|
+
from ... import errors
|
|
7
|
+
from ...client import AuthenticatedClient, Client
|
|
8
|
+
from ...models.error_response import ErrorResponse
|
|
9
|
+
from ...models.http_validation_error import HTTPValidationError
|
|
10
|
+
from ...models.table_query_request import TableQueryRequest
|
|
11
|
+
from ...models.table_query_response import TableQueryResponse
|
|
12
|
+
from ...types import UNSET, Response, Unset
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _get_kwargs(
|
|
16
|
+
graph_id: str,
|
|
17
|
+
*,
|
|
18
|
+
body: TableQueryRequest,
|
|
19
|
+
token: Union[None, Unset, str] = UNSET,
|
|
20
|
+
authorization: Union[None, Unset, str] = UNSET,
|
|
21
|
+
) -> dict[str, Any]:
|
|
22
|
+
headers: dict[str, Any] = {}
|
|
23
|
+
if not isinstance(authorization, Unset):
|
|
24
|
+
headers["authorization"] = authorization
|
|
25
|
+
|
|
26
|
+
params: dict[str, Any] = {}
|
|
27
|
+
|
|
28
|
+
json_token: Union[None, Unset, str]
|
|
29
|
+
if isinstance(token, Unset):
|
|
30
|
+
json_token = UNSET
|
|
31
|
+
else:
|
|
32
|
+
json_token = token
|
|
33
|
+
params["token"] = json_token
|
|
34
|
+
|
|
35
|
+
params = {k: v for k, v in params.items() if v is not UNSET and v is not None}
|
|
36
|
+
|
|
37
|
+
_kwargs: dict[str, Any] = {
|
|
38
|
+
"method": "post",
|
|
39
|
+
"url": f"/v1/graphs/{graph_id}/tables/query",
|
|
40
|
+
"params": params,
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
_kwargs["json"] = body.to_dict()
|
|
44
|
+
|
|
45
|
+
headers["Content-Type"] = "application/json"
|
|
46
|
+
|
|
47
|
+
_kwargs["headers"] = headers
|
|
48
|
+
return _kwargs
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _parse_response(
|
|
52
|
+
*, client: Union[AuthenticatedClient, Client], response: httpx.Response
|
|
53
|
+
) -> Optional[Union[Any, ErrorResponse, HTTPValidationError, TableQueryResponse]]:
|
|
54
|
+
if response.status_code == 200:
|
|
55
|
+
response_200 = TableQueryResponse.from_dict(response.json())
|
|
56
|
+
|
|
57
|
+
return response_200
|
|
58
|
+
|
|
59
|
+
if response.status_code == 400:
|
|
60
|
+
response_400 = ErrorResponse.from_dict(response.json())
|
|
61
|
+
|
|
62
|
+
return response_400
|
|
63
|
+
|
|
64
|
+
if response.status_code == 401:
|
|
65
|
+
response_401 = cast(Any, None)
|
|
66
|
+
return response_401
|
|
67
|
+
|
|
68
|
+
if response.status_code == 403:
|
|
69
|
+
response_403 = ErrorResponse.from_dict(response.json())
|
|
70
|
+
|
|
71
|
+
return response_403
|
|
72
|
+
|
|
73
|
+
if response.status_code == 404:
|
|
74
|
+
response_404 = ErrorResponse.from_dict(response.json())
|
|
75
|
+
|
|
76
|
+
return response_404
|
|
77
|
+
|
|
78
|
+
if response.status_code == 408:
|
|
79
|
+
response_408 = cast(Any, None)
|
|
80
|
+
return response_408
|
|
81
|
+
|
|
82
|
+
if response.status_code == 422:
|
|
83
|
+
response_422 = HTTPValidationError.from_dict(response.json())
|
|
84
|
+
|
|
85
|
+
return response_422
|
|
86
|
+
|
|
87
|
+
if response.status_code == 500:
|
|
88
|
+
response_500 = cast(Any, None)
|
|
89
|
+
return response_500
|
|
90
|
+
|
|
91
|
+
if client.raise_on_unexpected_status:
|
|
92
|
+
raise errors.UnexpectedStatus(response.status_code, response.content)
|
|
93
|
+
else:
|
|
94
|
+
return None
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _build_response(
|
|
98
|
+
*, client: Union[AuthenticatedClient, Client], response: httpx.Response
|
|
99
|
+
) -> Response[Union[Any, ErrorResponse, HTTPValidationError, TableQueryResponse]]:
|
|
100
|
+
return Response(
|
|
101
|
+
status_code=HTTPStatus(response.status_code),
|
|
102
|
+
content=response.content,
|
|
103
|
+
headers=response.headers,
|
|
104
|
+
parsed=_parse_response(client=client, response=response),
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def sync_detailed(
|
|
109
|
+
graph_id: str,
|
|
110
|
+
*,
|
|
111
|
+
client: AuthenticatedClient,
|
|
112
|
+
body: TableQueryRequest,
|
|
113
|
+
token: Union[None, Unset, str] = UNSET,
|
|
114
|
+
authorization: Union[None, Unset, str] = UNSET,
|
|
115
|
+
) -> Response[Union[Any, ErrorResponse, HTTPValidationError, TableQueryResponse]]:
|
|
116
|
+
"""Query Staging Tables with SQL
|
|
117
|
+
|
|
118
|
+
Execute SQL queries on DuckDB staging tables for data inspection and validation.
|
|
119
|
+
|
|
120
|
+
**Purpose:**
|
|
121
|
+
Query raw staging data directly with SQL before ingestion into the graph database.
|
|
122
|
+
Useful for data quality checks, validation, and exploratory analysis.
|
|
123
|
+
|
|
124
|
+
**Use Cases:**
|
|
125
|
+
- Validate data quality before graph ingestion
|
|
126
|
+
- Inspect row-level data for debugging
|
|
127
|
+
- Run analytics on staging tables
|
|
128
|
+
- Check for duplicates, nulls, or data issues
|
|
129
|
+
- Preview data transformations
|
|
130
|
+
|
|
131
|
+
**Workflow:**
|
|
132
|
+
1. Upload data files via `POST /tables/{table_name}/files`
|
|
133
|
+
2. Query staging tables to validate: `POST /tables/query`
|
|
134
|
+
3. Fix any data issues by re-uploading
|
|
135
|
+
4. Ingest validated data: `POST /tables/ingest`
|
|
136
|
+
|
|
137
|
+
**Supported SQL:**
|
|
138
|
+
- Full DuckDB SQL syntax
|
|
139
|
+
- SELECT, JOIN, WHERE, GROUP BY, ORDER BY
|
|
140
|
+
- Aggregations, window functions, CTEs
|
|
141
|
+
- Multiple table joins across staging area
|
|
142
|
+
|
|
143
|
+
**Example Queries:**
|
|
144
|
+
```sql
|
|
145
|
+
-- Count rows in staging table
|
|
146
|
+
SELECT COUNT(*) FROM Entity;
|
|
147
|
+
|
|
148
|
+
-- Check for nulls
|
|
149
|
+
SELECT * FROM Entity WHERE name IS NULL LIMIT 10;
|
|
150
|
+
|
|
151
|
+
-- Find duplicates
|
|
152
|
+
SELECT identifier, COUNT(*) as cnt
|
|
153
|
+
FROM Entity
|
|
154
|
+
GROUP BY identifier
|
|
155
|
+
HAVING COUNT(*) > 1;
|
|
156
|
+
|
|
157
|
+
-- Join across tables
|
|
158
|
+
SELECT e.name, COUNT(t.id) as transaction_count
|
|
159
|
+
FROM Entity e
|
|
160
|
+
LEFT JOIN Transaction t ON e.identifier = t.entity_id
|
|
161
|
+
GROUP BY e.name
|
|
162
|
+
ORDER BY transaction_count DESC;
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
**Limits:**
|
|
166
|
+
- Query timeout: 30 seconds
|
|
167
|
+
- Result limit: 10,000 rows (use LIMIT clause)
|
|
168
|
+
- Read-only: No INSERT, UPDATE, DELETE
|
|
169
|
+
- User's tables only: Cannot query other users' data
|
|
170
|
+
|
|
171
|
+
**Shared Repositories:**
|
|
172
|
+
Shared repositories (SEC, etc.) do not allow direct SQL queries.
|
|
173
|
+
Use the graph query endpoint instead: `POST /v1/graphs/{graph_id}/query`
|
|
174
|
+
|
|
175
|
+
**Note:**
|
|
176
|
+
Staging table queries are included - no credit consumption.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
graph_id (str): Graph database identifier
|
|
180
|
+
token (Union[None, Unset, str]): JWT token for SSE authentication
|
|
181
|
+
authorization (Union[None, Unset, str]):
|
|
182
|
+
body (TableQueryRequest):
|
|
183
|
+
|
|
184
|
+
Raises:
|
|
185
|
+
errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
|
|
186
|
+
httpx.TimeoutException: If the request takes longer than Client.timeout.
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
Response[Union[Any, ErrorResponse, HTTPValidationError, TableQueryResponse]]
|
|
190
|
+
"""
|
|
191
|
+
|
|
192
|
+
kwargs = _get_kwargs(
|
|
193
|
+
graph_id=graph_id,
|
|
194
|
+
body=body,
|
|
195
|
+
token=token,
|
|
196
|
+
authorization=authorization,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
response = client.get_httpx_client().request(
|
|
200
|
+
**kwargs,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
return _build_response(client=client, response=response)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def sync(
|
|
207
|
+
graph_id: str,
|
|
208
|
+
*,
|
|
209
|
+
client: AuthenticatedClient,
|
|
210
|
+
body: TableQueryRequest,
|
|
211
|
+
token: Union[None, Unset, str] = UNSET,
|
|
212
|
+
authorization: Union[None, Unset, str] = UNSET,
|
|
213
|
+
) -> Optional[Union[Any, ErrorResponse, HTTPValidationError, TableQueryResponse]]:
|
|
214
|
+
"""Query Staging Tables with SQL
|
|
215
|
+
|
|
216
|
+
Execute SQL queries on DuckDB staging tables for data inspection and validation.
|
|
217
|
+
|
|
218
|
+
**Purpose:**
|
|
219
|
+
Query raw staging data directly with SQL before ingestion into the graph database.
|
|
220
|
+
Useful for data quality checks, validation, and exploratory analysis.
|
|
221
|
+
|
|
222
|
+
**Use Cases:**
|
|
223
|
+
- Validate data quality before graph ingestion
|
|
224
|
+
- Inspect row-level data for debugging
|
|
225
|
+
- Run analytics on staging tables
|
|
226
|
+
- Check for duplicates, nulls, or data issues
|
|
227
|
+
- Preview data transformations
|
|
228
|
+
|
|
229
|
+
**Workflow:**
|
|
230
|
+
1. Upload data files via `POST /tables/{table_name}/files`
|
|
231
|
+
2. Query staging tables to validate: `POST /tables/query`
|
|
232
|
+
3. Fix any data issues by re-uploading
|
|
233
|
+
4. Ingest validated data: `POST /tables/ingest`
|
|
234
|
+
|
|
235
|
+
**Supported SQL:**
|
|
236
|
+
- Full DuckDB SQL syntax
|
|
237
|
+
- SELECT, JOIN, WHERE, GROUP BY, ORDER BY
|
|
238
|
+
- Aggregations, window functions, CTEs
|
|
239
|
+
- Multiple table joins across staging area
|
|
240
|
+
|
|
241
|
+
**Example Queries:**
|
|
242
|
+
```sql
|
|
243
|
+
-- Count rows in staging table
|
|
244
|
+
SELECT COUNT(*) FROM Entity;
|
|
245
|
+
|
|
246
|
+
-- Check for nulls
|
|
247
|
+
SELECT * FROM Entity WHERE name IS NULL LIMIT 10;
|
|
248
|
+
|
|
249
|
+
-- Find duplicates
|
|
250
|
+
SELECT identifier, COUNT(*) as cnt
|
|
251
|
+
FROM Entity
|
|
252
|
+
GROUP BY identifier
|
|
253
|
+
HAVING COUNT(*) > 1;
|
|
254
|
+
|
|
255
|
+
-- Join across tables
|
|
256
|
+
SELECT e.name, COUNT(t.id) as transaction_count
|
|
257
|
+
FROM Entity e
|
|
258
|
+
LEFT JOIN Transaction t ON e.identifier = t.entity_id
|
|
259
|
+
GROUP BY e.name
|
|
260
|
+
ORDER BY transaction_count DESC;
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
**Limits:**
|
|
264
|
+
- Query timeout: 30 seconds
|
|
265
|
+
- Result limit: 10,000 rows (use LIMIT clause)
|
|
266
|
+
- Read-only: No INSERT, UPDATE, DELETE
|
|
267
|
+
- User's tables only: Cannot query other users' data
|
|
268
|
+
|
|
269
|
+
**Shared Repositories:**
|
|
270
|
+
Shared repositories (SEC, etc.) do not allow direct SQL queries.
|
|
271
|
+
Use the graph query endpoint instead: `POST /v1/graphs/{graph_id}/query`
|
|
272
|
+
|
|
273
|
+
**Note:**
|
|
274
|
+
Staging table queries are included - no credit consumption.
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
graph_id (str): Graph database identifier
|
|
278
|
+
token (Union[None, Unset, str]): JWT token for SSE authentication
|
|
279
|
+
authorization (Union[None, Unset, str]):
|
|
280
|
+
body (TableQueryRequest):
|
|
281
|
+
|
|
282
|
+
Raises:
|
|
283
|
+
errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
|
|
284
|
+
httpx.TimeoutException: If the request takes longer than Client.timeout.
|
|
285
|
+
|
|
286
|
+
Returns:
|
|
287
|
+
Union[Any, ErrorResponse, HTTPValidationError, TableQueryResponse]
|
|
288
|
+
"""
|
|
289
|
+
|
|
290
|
+
return sync_detailed(
|
|
291
|
+
graph_id=graph_id,
|
|
292
|
+
client=client,
|
|
293
|
+
body=body,
|
|
294
|
+
token=token,
|
|
295
|
+
authorization=authorization,
|
|
296
|
+
).parsed
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
async def asyncio_detailed(
|
|
300
|
+
graph_id: str,
|
|
301
|
+
*,
|
|
302
|
+
client: AuthenticatedClient,
|
|
303
|
+
body: TableQueryRequest,
|
|
304
|
+
token: Union[None, Unset, str] = UNSET,
|
|
305
|
+
authorization: Union[None, Unset, str] = UNSET,
|
|
306
|
+
) -> Response[Union[Any, ErrorResponse, HTTPValidationError, TableQueryResponse]]:
|
|
307
|
+
"""Query Staging Tables with SQL
|
|
308
|
+
|
|
309
|
+
Execute SQL queries on DuckDB staging tables for data inspection and validation.
|
|
310
|
+
|
|
311
|
+
**Purpose:**
|
|
312
|
+
Query raw staging data directly with SQL before ingestion into the graph database.
|
|
313
|
+
Useful for data quality checks, validation, and exploratory analysis.
|
|
314
|
+
|
|
315
|
+
**Use Cases:**
|
|
316
|
+
- Validate data quality before graph ingestion
|
|
317
|
+
- Inspect row-level data for debugging
|
|
318
|
+
- Run analytics on staging tables
|
|
319
|
+
- Check for duplicates, nulls, or data issues
|
|
320
|
+
- Preview data transformations
|
|
321
|
+
|
|
322
|
+
**Workflow:**
|
|
323
|
+
1. Upload data files via `POST /tables/{table_name}/files`
|
|
324
|
+
2. Query staging tables to validate: `POST /tables/query`
|
|
325
|
+
3. Fix any data issues by re-uploading
|
|
326
|
+
4. Ingest validated data: `POST /tables/ingest`
|
|
327
|
+
|
|
328
|
+
**Supported SQL:**
|
|
329
|
+
- Full DuckDB SQL syntax
|
|
330
|
+
- SELECT, JOIN, WHERE, GROUP BY, ORDER BY
|
|
331
|
+
- Aggregations, window functions, CTEs
|
|
332
|
+
- Multiple table joins across staging area
|
|
333
|
+
|
|
334
|
+
**Example Queries:**
|
|
335
|
+
```sql
|
|
336
|
+
-- Count rows in staging table
|
|
337
|
+
SELECT COUNT(*) FROM Entity;
|
|
338
|
+
|
|
339
|
+
-- Check for nulls
|
|
340
|
+
SELECT * FROM Entity WHERE name IS NULL LIMIT 10;
|
|
341
|
+
|
|
342
|
+
-- Find duplicates
|
|
343
|
+
SELECT identifier, COUNT(*) as cnt
|
|
344
|
+
FROM Entity
|
|
345
|
+
GROUP BY identifier
|
|
346
|
+
HAVING COUNT(*) > 1;
|
|
347
|
+
|
|
348
|
+
-- Join across tables
|
|
349
|
+
SELECT e.name, COUNT(t.id) as transaction_count
|
|
350
|
+
FROM Entity e
|
|
351
|
+
LEFT JOIN Transaction t ON e.identifier = t.entity_id
|
|
352
|
+
GROUP BY e.name
|
|
353
|
+
ORDER BY transaction_count DESC;
|
|
354
|
+
```
|
|
355
|
+
|
|
356
|
+
**Limits:**
|
|
357
|
+
- Query timeout: 30 seconds
|
|
358
|
+
- Result limit: 10,000 rows (use LIMIT clause)
|
|
359
|
+
- Read-only: No INSERT, UPDATE, DELETE
|
|
360
|
+
- User's tables only: Cannot query other users' data
|
|
361
|
+
|
|
362
|
+
**Shared Repositories:**
|
|
363
|
+
Shared repositories (SEC, etc.) do not allow direct SQL queries.
|
|
364
|
+
Use the graph query endpoint instead: `POST /v1/graphs/{graph_id}/query`
|
|
365
|
+
|
|
366
|
+
**Note:**
|
|
367
|
+
Staging table queries are included - no credit consumption.
|
|
368
|
+
|
|
369
|
+
Args:
|
|
370
|
+
graph_id (str): Graph database identifier
|
|
371
|
+
token (Union[None, Unset, str]): JWT token for SSE authentication
|
|
372
|
+
authorization (Union[None, Unset, str]):
|
|
373
|
+
body (TableQueryRequest):
|
|
374
|
+
|
|
375
|
+
Raises:
|
|
376
|
+
errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
|
|
377
|
+
httpx.TimeoutException: If the request takes longer than Client.timeout.
|
|
378
|
+
|
|
379
|
+
Returns:
|
|
380
|
+
Response[Union[Any, ErrorResponse, HTTPValidationError, TableQueryResponse]]
|
|
381
|
+
"""
|
|
382
|
+
|
|
383
|
+
kwargs = _get_kwargs(
|
|
384
|
+
graph_id=graph_id,
|
|
385
|
+
body=body,
|
|
386
|
+
token=token,
|
|
387
|
+
authorization=authorization,
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
response = await client.get_async_httpx_client().request(**kwargs)
|
|
391
|
+
|
|
392
|
+
return _build_response(client=client, response=response)
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
async def asyncio(
|
|
396
|
+
graph_id: str,
|
|
397
|
+
*,
|
|
398
|
+
client: AuthenticatedClient,
|
|
399
|
+
body: TableQueryRequest,
|
|
400
|
+
token: Union[None, Unset, str] = UNSET,
|
|
401
|
+
authorization: Union[None, Unset, str] = UNSET,
|
|
402
|
+
) -> Optional[Union[Any, ErrorResponse, HTTPValidationError, TableQueryResponse]]:
|
|
403
|
+
"""Query Staging Tables with SQL
|
|
404
|
+
|
|
405
|
+
Execute SQL queries on DuckDB staging tables for data inspection and validation.
|
|
406
|
+
|
|
407
|
+
**Purpose:**
|
|
408
|
+
Query raw staging data directly with SQL before ingestion into the graph database.
|
|
409
|
+
Useful for data quality checks, validation, and exploratory analysis.
|
|
410
|
+
|
|
411
|
+
**Use Cases:**
|
|
412
|
+
- Validate data quality before graph ingestion
|
|
413
|
+
- Inspect row-level data for debugging
|
|
414
|
+
- Run analytics on staging tables
|
|
415
|
+
- Check for duplicates, nulls, or data issues
|
|
416
|
+
- Preview data transformations
|
|
417
|
+
|
|
418
|
+
**Workflow:**
|
|
419
|
+
1. Upload data files via `POST /tables/{table_name}/files`
|
|
420
|
+
2. Query staging tables to validate: `POST /tables/query`
|
|
421
|
+
3. Fix any data issues by re-uploading
|
|
422
|
+
4. Ingest validated data: `POST /tables/ingest`
|
|
423
|
+
|
|
424
|
+
**Supported SQL:**
|
|
425
|
+
- Full DuckDB SQL syntax
|
|
426
|
+
- SELECT, JOIN, WHERE, GROUP BY, ORDER BY
|
|
427
|
+
- Aggregations, window functions, CTEs
|
|
428
|
+
- Multiple table joins across staging area
|
|
429
|
+
|
|
430
|
+
**Example Queries:**
|
|
431
|
+
```sql
|
|
432
|
+
-- Count rows in staging table
|
|
433
|
+
SELECT COUNT(*) FROM Entity;
|
|
434
|
+
|
|
435
|
+
-- Check for nulls
|
|
436
|
+
SELECT * FROM Entity WHERE name IS NULL LIMIT 10;
|
|
437
|
+
|
|
438
|
+
-- Find duplicates
|
|
439
|
+
SELECT identifier, COUNT(*) as cnt
|
|
440
|
+
FROM Entity
|
|
441
|
+
GROUP BY identifier
|
|
442
|
+
HAVING COUNT(*) > 1;
|
|
443
|
+
|
|
444
|
+
-- Join across tables
|
|
445
|
+
SELECT e.name, COUNT(t.id) as transaction_count
|
|
446
|
+
FROM Entity e
|
|
447
|
+
LEFT JOIN Transaction t ON e.identifier = t.entity_id
|
|
448
|
+
GROUP BY e.name
|
|
449
|
+
ORDER BY transaction_count DESC;
|
|
450
|
+
```
|
|
451
|
+
|
|
452
|
+
**Limits:**
|
|
453
|
+
- Query timeout: 30 seconds
|
|
454
|
+
- Result limit: 10,000 rows (use LIMIT clause)
|
|
455
|
+
- Read-only: No INSERT, UPDATE, DELETE
|
|
456
|
+
- User's tables only: Cannot query other users' data
|
|
457
|
+
|
|
458
|
+
**Shared Repositories:**
|
|
459
|
+
Shared repositories (SEC, etc.) do not allow direct SQL queries.
|
|
460
|
+
Use the graph query endpoint instead: `POST /v1/graphs/{graph_id}/query`
|
|
461
|
+
|
|
462
|
+
**Note:**
|
|
463
|
+
Staging table queries are included - no credit consumption.
|
|
464
|
+
|
|
465
|
+
Args:
|
|
466
|
+
graph_id (str): Graph database identifier
|
|
467
|
+
token (Union[None, Unset, str]): JWT token for SSE authentication
|
|
468
|
+
authorization (Union[None, Unset, str]):
|
|
469
|
+
body (TableQueryRequest):
|
|
470
|
+
|
|
471
|
+
Raises:
|
|
472
|
+
errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
|
|
473
|
+
httpx.TimeoutException: If the request takes longer than Client.timeout.
|
|
474
|
+
|
|
475
|
+
Returns:
|
|
476
|
+
Union[Any, ErrorResponse, HTTPValidationError, TableQueryResponse]
|
|
477
|
+
"""
|
|
478
|
+
|
|
479
|
+
return (
|
|
480
|
+
await asyncio_detailed(
|
|
481
|
+
graph_id=graph_id,
|
|
482
|
+
client=client,
|
|
483
|
+
body=body,
|
|
484
|
+
token=token,
|
|
485
|
+
authorization=authorization,
|
|
486
|
+
)
|
|
487
|
+
).parsed
|