robosystems-client 0.2.16__py3-none-any.whl → 0.2.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of robosystems-client might be problematic. Click here for more details.
- robosystems_client/extensions/__init__.py +28 -10
- robosystems_client/extensions/extensions.py +12 -3
- robosystems_client/extensions/file_client.py +432 -0
- robosystems_client/extensions/materialization_client.py +237 -0
- robosystems_client/extensions/table_client.py +187 -0
- {robosystems_client-0.2.16.dist-info → robosystems_client-0.2.18.dist-info}/METADATA +1 -1
- {robosystems_client-0.2.16.dist-info → robosystems_client-0.2.18.dist-info}/RECORD +9 -7
- robosystems_client/extensions/table_ingest_client.py +0 -463
- {robosystems_client-0.2.16.dist-info → robosystems_client-0.2.18.dist-info}/WHEEL +0 -0
- {robosystems_client-0.2.16.dist-info → robosystems_client-0.2.18.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,463 +0,0 @@
|
|
|
1
|
-
"""Table Ingest Client for RoboSystems API
|
|
2
|
-
|
|
3
|
-
Simplifies uploading Parquet files to staging tables and ingesting them into graphs.
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
from dataclasses import dataclass
|
|
7
|
-
from io import BytesIO
|
|
8
|
-
from pathlib import Path
|
|
9
|
-
from typing import Dict, Any, Optional, Callable, List, Union, BinaryIO
|
|
10
|
-
import json
|
|
11
|
-
import logging
|
|
12
|
-
import httpx
|
|
13
|
-
|
|
14
|
-
from ..api.files.create_file_upload import (
|
|
15
|
-
sync_detailed as create_file_upload,
|
|
16
|
-
)
|
|
17
|
-
from ..api.files.update_file import (
|
|
18
|
-
sync_detailed as update_file,
|
|
19
|
-
)
|
|
20
|
-
from ..api.tables.list_tables import (
|
|
21
|
-
sync_detailed as list_tables,
|
|
22
|
-
)
|
|
23
|
-
from ..api.materialization.materialize_graph import (
|
|
24
|
-
sync_detailed as materialize_graph,
|
|
25
|
-
)
|
|
26
|
-
from ..models.file_upload_request import FileUploadRequest
|
|
27
|
-
from ..models.file_status_update import FileStatusUpdate
|
|
28
|
-
from ..models.materialize_request import MaterializeRequest
|
|
29
|
-
|
|
30
|
-
logger = logging.getLogger(__name__)
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
@dataclass
|
|
34
|
-
class UploadOptions:
|
|
35
|
-
"""Options for file upload operations"""
|
|
36
|
-
|
|
37
|
-
on_progress: Optional[Callable[[str], None]] = None
|
|
38
|
-
fix_localstack_url: bool = True # Auto-fix LocalStack URLs for localhost
|
|
39
|
-
file_name: Optional[str] = None # Override file name (useful for buffer uploads)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
@dataclass
|
|
43
|
-
class IngestOptions:
|
|
44
|
-
"""Options for table ingestion operations"""
|
|
45
|
-
|
|
46
|
-
ignore_errors: bool = True
|
|
47
|
-
rebuild: bool = False
|
|
48
|
-
on_progress: Optional[Callable[[str], None]] = None
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
@dataclass
|
|
52
|
-
class UploadResult:
|
|
53
|
-
"""Result from file upload operation"""
|
|
54
|
-
|
|
55
|
-
file_id: str
|
|
56
|
-
file_size: int
|
|
57
|
-
row_count: int
|
|
58
|
-
table_name: str
|
|
59
|
-
file_name: str
|
|
60
|
-
success: bool = True
|
|
61
|
-
error: Optional[str] = None
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
@dataclass
|
|
65
|
-
class TableInfo:
|
|
66
|
-
"""Information about a staging table"""
|
|
67
|
-
|
|
68
|
-
table_name: str
|
|
69
|
-
row_count: int
|
|
70
|
-
file_count: int
|
|
71
|
-
total_size_bytes: int
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
class TableIngestClient:
|
|
75
|
-
"""Enhanced table ingest client with simplified upload workflow"""
|
|
76
|
-
|
|
77
|
-
def __init__(self, config: Dict[str, Any]):
|
|
78
|
-
self.config = config
|
|
79
|
-
self.base_url = config["base_url"]
|
|
80
|
-
self.headers = config.get("headers", {})
|
|
81
|
-
self.token = config.get("token")
|
|
82
|
-
# Create httpx client for S3 uploads
|
|
83
|
-
self._http_client = httpx.Client(timeout=120.0)
|
|
84
|
-
|
|
85
|
-
def upload_parquet_file(
|
|
86
|
-
self,
|
|
87
|
-
graph_id: str,
|
|
88
|
-
table_name: str,
|
|
89
|
-
file_or_buffer: Union[Path, str, BytesIO, BinaryIO],
|
|
90
|
-
options: Optional[UploadOptions] = None,
|
|
91
|
-
) -> UploadResult:
|
|
92
|
-
"""
|
|
93
|
-
Upload a Parquet file to a staging table.
|
|
94
|
-
|
|
95
|
-
This method handles the complete 3-step upload process:
|
|
96
|
-
1. Get presigned upload URL
|
|
97
|
-
2. Upload file to S3
|
|
98
|
-
3. Mark file as 'uploaded' (backend validates, calculates size/row count)
|
|
99
|
-
|
|
100
|
-
Args:
|
|
101
|
-
graph_id: The graph ID
|
|
102
|
-
table_name: Name of the staging table
|
|
103
|
-
file_or_buffer: Path to the Parquet file or BytesIO/BinaryIO buffer
|
|
104
|
-
options: Upload options
|
|
105
|
-
|
|
106
|
-
Returns:
|
|
107
|
-
UploadResult with upload details (size/row count calculated by backend)
|
|
108
|
-
"""
|
|
109
|
-
if options is None:
|
|
110
|
-
options = UploadOptions()
|
|
111
|
-
|
|
112
|
-
# Auto-detect if this is a file path or buffer
|
|
113
|
-
is_buffer = isinstance(file_or_buffer, (BytesIO, BinaryIO)) or hasattr(
|
|
114
|
-
file_or_buffer, "read"
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
# Initialize file_path for type checking
|
|
118
|
-
file_path: Optional[Path] = None
|
|
119
|
-
|
|
120
|
-
if is_buffer:
|
|
121
|
-
# Handle buffer upload
|
|
122
|
-
file_name = options.file_name or "data.parquet"
|
|
123
|
-
else:
|
|
124
|
-
# Handle file path upload
|
|
125
|
-
file_path = Path(file_or_buffer)
|
|
126
|
-
file_name = file_path.name
|
|
127
|
-
if not file_path.exists():
|
|
128
|
-
return UploadResult(
|
|
129
|
-
file_id="",
|
|
130
|
-
file_size=0,
|
|
131
|
-
row_count=0,
|
|
132
|
-
table_name=table_name,
|
|
133
|
-
file_name=file_name,
|
|
134
|
-
success=False,
|
|
135
|
-
error=f"File not found: {file_path}",
|
|
136
|
-
)
|
|
137
|
-
|
|
138
|
-
try:
|
|
139
|
-
# Import client here to avoid circular imports
|
|
140
|
-
from ..client import AuthenticatedClient
|
|
141
|
-
|
|
142
|
-
# Create authenticated client with X-API-Key
|
|
143
|
-
# The token is extracted from X-API-Key header in extensions.py
|
|
144
|
-
if not self.token:
|
|
145
|
-
return UploadResult(
|
|
146
|
-
file_id="",
|
|
147
|
-
file_size=0,
|
|
148
|
-
row_count=0,
|
|
149
|
-
table_name=table_name,
|
|
150
|
-
file_name=file_name,
|
|
151
|
-
success=False,
|
|
152
|
-
error="No API key provided. Set X-API-Key in headers.",
|
|
153
|
-
)
|
|
154
|
-
|
|
155
|
-
client = AuthenticatedClient(
|
|
156
|
-
base_url=self.base_url,
|
|
157
|
-
token=self.token,
|
|
158
|
-
prefix="", # No prefix for X-API-Key
|
|
159
|
-
auth_header_name="X-API-Key", # Use X-API-Key header instead of Authorization
|
|
160
|
-
headers=self.headers,
|
|
161
|
-
)
|
|
162
|
-
|
|
163
|
-
# Step 1: Get presigned upload URL
|
|
164
|
-
if options.on_progress:
|
|
165
|
-
options.on_progress(
|
|
166
|
-
f"Getting upload URL for {file_name} -> table '{table_name}'..."
|
|
167
|
-
)
|
|
168
|
-
|
|
169
|
-
upload_request = FileUploadRequest(
|
|
170
|
-
file_name=file_name, content_type="application/x-parquet", table_name=table_name
|
|
171
|
-
)
|
|
172
|
-
|
|
173
|
-
kwargs = {
|
|
174
|
-
"graph_id": graph_id,
|
|
175
|
-
"client": client,
|
|
176
|
-
"body": upload_request,
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
response = create_file_upload(**kwargs)
|
|
180
|
-
|
|
181
|
-
if not response.parsed:
|
|
182
|
-
error_msg = f"Failed to get upload URL (status: {response.status_code})"
|
|
183
|
-
if hasattr(response, "content"):
|
|
184
|
-
try:
|
|
185
|
-
error_detail = json.loads(response.content)
|
|
186
|
-
error_msg = f"{error_msg}: {error_detail}"
|
|
187
|
-
except (json.JSONDecodeError, ValueError):
|
|
188
|
-
error_msg = f"{error_msg}: {response.content[:200]}"
|
|
189
|
-
|
|
190
|
-
return UploadResult(
|
|
191
|
-
file_id="",
|
|
192
|
-
file_size=0,
|
|
193
|
-
row_count=0,
|
|
194
|
-
table_name=table_name,
|
|
195
|
-
file_name=file_name,
|
|
196
|
-
success=False,
|
|
197
|
-
error=error_msg,
|
|
198
|
-
)
|
|
199
|
-
|
|
200
|
-
upload_url = response.parsed.upload_url
|
|
201
|
-
file_id = response.parsed.file_id
|
|
202
|
-
|
|
203
|
-
# Fix LocalStack URL if needed
|
|
204
|
-
if options.fix_localstack_url and "localstack:4566" in upload_url:
|
|
205
|
-
upload_url = upload_url.replace("localstack:4566", "localhost:4566")
|
|
206
|
-
logger.debug("Fixed LocalStack URL for localhost access")
|
|
207
|
-
|
|
208
|
-
# Step 2: Upload file to S3
|
|
209
|
-
if options.on_progress:
|
|
210
|
-
options.on_progress(f"Uploading {file_name} to S3...")
|
|
211
|
-
|
|
212
|
-
# Read file content - handle both paths and buffers
|
|
213
|
-
if is_buffer:
|
|
214
|
-
# Read from buffer
|
|
215
|
-
if hasattr(file_or_buffer, "getvalue"):
|
|
216
|
-
file_content = file_or_buffer.getvalue()
|
|
217
|
-
else:
|
|
218
|
-
# BinaryIO or file-like object
|
|
219
|
-
file_or_buffer.seek(0)
|
|
220
|
-
file_content = file_or_buffer.read()
|
|
221
|
-
else:
|
|
222
|
-
# Read from file path
|
|
223
|
-
if file_path is None:
|
|
224
|
-
raise ValueError("file_path should not be None when not using buffer")
|
|
225
|
-
with open(file_path, "rb") as f:
|
|
226
|
-
file_content = f.read()
|
|
227
|
-
|
|
228
|
-
s3_response = self._http_client.put(
|
|
229
|
-
upload_url,
|
|
230
|
-
content=file_content,
|
|
231
|
-
headers={"Content-Type": "application/x-parquet"},
|
|
232
|
-
)
|
|
233
|
-
s3_response.raise_for_status()
|
|
234
|
-
|
|
235
|
-
# Step 3: Mark file as uploaded (backend validates and calculates size/row count)
|
|
236
|
-
if options.on_progress:
|
|
237
|
-
options.on_progress(f"Marking {file_name} as uploaded...")
|
|
238
|
-
|
|
239
|
-
status_update = FileStatusUpdate(status="uploaded")
|
|
240
|
-
|
|
241
|
-
kwargs = {
|
|
242
|
-
"graph_id": graph_id,
|
|
243
|
-
"file_id": file_id,
|
|
244
|
-
"client": client,
|
|
245
|
-
"body": status_update,
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
update_response = update_file(**kwargs)
|
|
249
|
-
|
|
250
|
-
if not update_response.parsed:
|
|
251
|
-
logger.error(
|
|
252
|
-
f"No parsed response from update_file. Status code: {update_response.status_code}"
|
|
253
|
-
)
|
|
254
|
-
return UploadResult(
|
|
255
|
-
file_id=file_id,
|
|
256
|
-
file_size=0,
|
|
257
|
-
row_count=0,
|
|
258
|
-
table_name=table_name,
|
|
259
|
-
file_name=file_name,
|
|
260
|
-
success=False,
|
|
261
|
-
error="Failed to complete file upload",
|
|
262
|
-
)
|
|
263
|
-
|
|
264
|
-
response_data = update_response.parsed
|
|
265
|
-
|
|
266
|
-
if isinstance(response_data, dict):
|
|
267
|
-
file_size = response_data.get("file_size_bytes", 0)
|
|
268
|
-
row_count = response_data.get("row_count", 0)
|
|
269
|
-
elif hasattr(response_data, "additional_properties"):
|
|
270
|
-
file_size = response_data.additional_properties.get("file_size_bytes", 0)
|
|
271
|
-
row_count = response_data.additional_properties.get("row_count", 0)
|
|
272
|
-
else:
|
|
273
|
-
file_size = getattr(response_data, "file_size_bytes", 0)
|
|
274
|
-
row_count = getattr(response_data, "row_count", 0)
|
|
275
|
-
|
|
276
|
-
if options.on_progress:
|
|
277
|
-
options.on_progress(
|
|
278
|
-
f"✅ Uploaded {file_name} ({file_size:,} bytes, {row_count:,} rows)"
|
|
279
|
-
)
|
|
280
|
-
|
|
281
|
-
return UploadResult(
|
|
282
|
-
file_id=file_id,
|
|
283
|
-
file_size=file_size,
|
|
284
|
-
row_count=row_count,
|
|
285
|
-
table_name=table_name,
|
|
286
|
-
file_name=file_name,
|
|
287
|
-
success=True,
|
|
288
|
-
)
|
|
289
|
-
|
|
290
|
-
except Exception as e:
|
|
291
|
-
logger.error(f"Upload failed for {file_name}: {e}")
|
|
292
|
-
return UploadResult(
|
|
293
|
-
file_id="",
|
|
294
|
-
file_size=0,
|
|
295
|
-
row_count=0,
|
|
296
|
-
table_name=table_name,
|
|
297
|
-
file_name=file_name,
|
|
298
|
-
success=False,
|
|
299
|
-
error=str(e),
|
|
300
|
-
)
|
|
301
|
-
|
|
302
|
-
def list_staging_tables(self, graph_id: str) -> List[TableInfo]:
|
|
303
|
-
"""
|
|
304
|
-
List all staging tables in a graph.
|
|
305
|
-
|
|
306
|
-
Args:
|
|
307
|
-
graph_id: The graph ID
|
|
308
|
-
|
|
309
|
-
Returns:
|
|
310
|
-
List of TableInfo objects
|
|
311
|
-
"""
|
|
312
|
-
try:
|
|
313
|
-
from ..client import AuthenticatedClient
|
|
314
|
-
|
|
315
|
-
if not self.token:
|
|
316
|
-
logger.error("No API key provided")
|
|
317
|
-
return []
|
|
318
|
-
|
|
319
|
-
client = AuthenticatedClient(
|
|
320
|
-
base_url=self.base_url,
|
|
321
|
-
token=self.token,
|
|
322
|
-
prefix="",
|
|
323
|
-
auth_header_name="X-API-Key",
|
|
324
|
-
headers=self.headers,
|
|
325
|
-
)
|
|
326
|
-
|
|
327
|
-
kwargs = {"graph_id": graph_id, "client": client}
|
|
328
|
-
|
|
329
|
-
response = list_tables(**kwargs)
|
|
330
|
-
|
|
331
|
-
if not response.parsed:
|
|
332
|
-
logger.error("Failed to list tables")
|
|
333
|
-
return []
|
|
334
|
-
|
|
335
|
-
tables = []
|
|
336
|
-
for table_data in response.parsed.tables:
|
|
337
|
-
tables.append(
|
|
338
|
-
TableInfo(
|
|
339
|
-
table_name=table_data.table_name,
|
|
340
|
-
row_count=table_data.row_count,
|
|
341
|
-
file_count=table_data.file_count,
|
|
342
|
-
total_size_bytes=table_data.total_size_bytes,
|
|
343
|
-
)
|
|
344
|
-
)
|
|
345
|
-
|
|
346
|
-
return tables
|
|
347
|
-
|
|
348
|
-
except Exception as e:
|
|
349
|
-
logger.error(f"Failed to list tables: {e}")
|
|
350
|
-
return []
|
|
351
|
-
|
|
352
|
-
def ingest_all_tables(
|
|
353
|
-
self, graph_id: str, options: Optional[IngestOptions] = None
|
|
354
|
-
) -> Dict[str, Any]:
|
|
355
|
-
"""
|
|
356
|
-
Materialize the graph from all staging tables.
|
|
357
|
-
|
|
358
|
-
This rebuilds the complete graph database from the current state of DuckDB staging tables.
|
|
359
|
-
|
|
360
|
-
Args:
|
|
361
|
-
graph_id: The graph ID
|
|
362
|
-
options: Ingest options
|
|
363
|
-
|
|
364
|
-
Returns:
|
|
365
|
-
Dictionary with materialization results
|
|
366
|
-
"""
|
|
367
|
-
if options is None:
|
|
368
|
-
options = IngestOptions()
|
|
369
|
-
|
|
370
|
-
try:
|
|
371
|
-
from ..client import AuthenticatedClient
|
|
372
|
-
|
|
373
|
-
if not self.token:
|
|
374
|
-
return {"success": False, "error": "No API key provided"}
|
|
375
|
-
|
|
376
|
-
client = AuthenticatedClient(
|
|
377
|
-
base_url=self.base_url,
|
|
378
|
-
token=self.token,
|
|
379
|
-
prefix="",
|
|
380
|
-
auth_header_name="X-API-Key",
|
|
381
|
-
headers=self.headers,
|
|
382
|
-
)
|
|
383
|
-
|
|
384
|
-
if options.on_progress:
|
|
385
|
-
options.on_progress("Starting table materialization...")
|
|
386
|
-
|
|
387
|
-
materialize_request = MaterializeRequest(
|
|
388
|
-
ignore_errors=options.ignore_errors, rebuild=options.rebuild, force=True
|
|
389
|
-
)
|
|
390
|
-
|
|
391
|
-
kwargs = {
|
|
392
|
-
"graph_id": graph_id,
|
|
393
|
-
"client": client,
|
|
394
|
-
"body": materialize_request,
|
|
395
|
-
}
|
|
396
|
-
|
|
397
|
-
response = materialize_graph(**kwargs)
|
|
398
|
-
|
|
399
|
-
if not response.parsed:
|
|
400
|
-
return {"success": False, "error": "Failed to materialize graph"}
|
|
401
|
-
|
|
402
|
-
result = {
|
|
403
|
-
"success": True,
|
|
404
|
-
"operation_id": getattr(response.parsed, "operation_id", None),
|
|
405
|
-
"message": getattr(response.parsed, "message", "Materialization started"),
|
|
406
|
-
}
|
|
407
|
-
|
|
408
|
-
if options.on_progress:
|
|
409
|
-
options.on_progress("✅ Graph materialization completed")
|
|
410
|
-
|
|
411
|
-
return result
|
|
412
|
-
|
|
413
|
-
except Exception as e:
|
|
414
|
-
logger.error(f"Failed to materialize graph: {e}")
|
|
415
|
-
return {"success": False, "error": str(e)}
|
|
416
|
-
|
|
417
|
-
def upload_and_ingest(
|
|
418
|
-
self,
|
|
419
|
-
graph_id: str,
|
|
420
|
-
table_name: str,
|
|
421
|
-
file_path: Path,
|
|
422
|
-
upload_options: Optional[UploadOptions] = None,
|
|
423
|
-
ingest_options: Optional[IngestOptions] = None,
|
|
424
|
-
) -> Dict[str, Any]:
|
|
425
|
-
"""
|
|
426
|
-
Convenience method to upload a file and immediately ingest it.
|
|
427
|
-
|
|
428
|
-
Args:
|
|
429
|
-
graph_id: The graph ID
|
|
430
|
-
table_name: Name of the staging table
|
|
431
|
-
file_path: Path to the Parquet file
|
|
432
|
-
upload_options: Upload options
|
|
433
|
-
ingest_options: Ingest options
|
|
434
|
-
|
|
435
|
-
Returns:
|
|
436
|
-
Dictionary with upload and ingest results
|
|
437
|
-
"""
|
|
438
|
-
# Upload the file
|
|
439
|
-
upload_result = self.upload_parquet_file(
|
|
440
|
-
graph_id, table_name, file_path, upload_options
|
|
441
|
-
)
|
|
442
|
-
|
|
443
|
-
if not upload_result.success:
|
|
444
|
-
return {
|
|
445
|
-
"success": False,
|
|
446
|
-
"upload": upload_result,
|
|
447
|
-
"ingest": None,
|
|
448
|
-
"error": upload_result.error,
|
|
449
|
-
}
|
|
450
|
-
|
|
451
|
-
# Ingest the table
|
|
452
|
-
ingest_result = self.ingest_all_tables(graph_id, ingest_options)
|
|
453
|
-
|
|
454
|
-
return {
|
|
455
|
-
"success": upload_result.success and ingest_result.get("success", False),
|
|
456
|
-
"upload": upload_result,
|
|
457
|
-
"ingest": ingest_result,
|
|
458
|
-
}
|
|
459
|
-
|
|
460
|
-
def close(self):
|
|
461
|
-
"""Close HTTP client connections"""
|
|
462
|
-
if self._http_client:
|
|
463
|
-
self._http_client.close()
|
|
File without changes
|
{robosystems_client-0.2.16.dist-info → robosystems_client-0.2.18.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|