robosystems-client 0.2.16__py3-none-any.whl → 0.2.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of robosystems-client might be problematic. Click here for more details.

@@ -1,463 +0,0 @@
1
- """Table Ingest Client for RoboSystems API
2
-
3
- Simplifies uploading Parquet files to staging tables and ingesting them into graphs.
4
- """
5
-
6
- from dataclasses import dataclass
7
- from io import BytesIO
8
- from pathlib import Path
9
- from typing import Dict, Any, Optional, Callable, List, Union, BinaryIO
10
- import json
11
- import logging
12
- import httpx
13
-
14
- from ..api.files.create_file_upload import (
15
- sync_detailed as create_file_upload,
16
- )
17
- from ..api.files.update_file import (
18
- sync_detailed as update_file,
19
- )
20
- from ..api.tables.list_tables import (
21
- sync_detailed as list_tables,
22
- )
23
- from ..api.materialization.materialize_graph import (
24
- sync_detailed as materialize_graph,
25
- )
26
- from ..models.file_upload_request import FileUploadRequest
27
- from ..models.file_status_update import FileStatusUpdate
28
- from ..models.materialize_request import MaterializeRequest
29
-
30
- logger = logging.getLogger(__name__)
31
-
32
-
33
- @dataclass
34
- class UploadOptions:
35
- """Options for file upload operations"""
36
-
37
- on_progress: Optional[Callable[[str], None]] = None
38
- fix_localstack_url: bool = True # Auto-fix LocalStack URLs for localhost
39
- file_name: Optional[str] = None # Override file name (useful for buffer uploads)
40
-
41
-
42
- @dataclass
43
- class IngestOptions:
44
- """Options for table ingestion operations"""
45
-
46
- ignore_errors: bool = True
47
- rebuild: bool = False
48
- on_progress: Optional[Callable[[str], None]] = None
49
-
50
-
51
- @dataclass
52
- class UploadResult:
53
- """Result from file upload operation"""
54
-
55
- file_id: str
56
- file_size: int
57
- row_count: int
58
- table_name: str
59
- file_name: str
60
- success: bool = True
61
- error: Optional[str] = None
62
-
63
-
64
- @dataclass
65
- class TableInfo:
66
- """Information about a staging table"""
67
-
68
- table_name: str
69
- row_count: int
70
- file_count: int
71
- total_size_bytes: int
72
-
73
-
74
- class TableIngestClient:
75
- """Enhanced table ingest client with simplified upload workflow"""
76
-
77
- def __init__(self, config: Dict[str, Any]):
78
- self.config = config
79
- self.base_url = config["base_url"]
80
- self.headers = config.get("headers", {})
81
- self.token = config.get("token")
82
- # Create httpx client for S3 uploads
83
- self._http_client = httpx.Client(timeout=120.0)
84
-
85
- def upload_parquet_file(
86
- self,
87
- graph_id: str,
88
- table_name: str,
89
- file_or_buffer: Union[Path, str, BytesIO, BinaryIO],
90
- options: Optional[UploadOptions] = None,
91
- ) -> UploadResult:
92
- """
93
- Upload a Parquet file to a staging table.
94
-
95
- This method handles the complete 3-step upload process:
96
- 1. Get presigned upload URL
97
- 2. Upload file to S3
98
- 3. Mark file as 'uploaded' (backend validates, calculates size/row count)
99
-
100
- Args:
101
- graph_id: The graph ID
102
- table_name: Name of the staging table
103
- file_or_buffer: Path to the Parquet file or BytesIO/BinaryIO buffer
104
- options: Upload options
105
-
106
- Returns:
107
- UploadResult with upload details (size/row count calculated by backend)
108
- """
109
- if options is None:
110
- options = UploadOptions()
111
-
112
- # Auto-detect if this is a file path or buffer
113
- is_buffer = isinstance(file_or_buffer, (BytesIO, BinaryIO)) or hasattr(
114
- file_or_buffer, "read"
115
- )
116
-
117
- # Initialize file_path for type checking
118
- file_path: Optional[Path] = None
119
-
120
- if is_buffer:
121
- # Handle buffer upload
122
- file_name = options.file_name or "data.parquet"
123
- else:
124
- # Handle file path upload
125
- file_path = Path(file_or_buffer)
126
- file_name = file_path.name
127
- if not file_path.exists():
128
- return UploadResult(
129
- file_id="",
130
- file_size=0,
131
- row_count=0,
132
- table_name=table_name,
133
- file_name=file_name,
134
- success=False,
135
- error=f"File not found: {file_path}",
136
- )
137
-
138
- try:
139
- # Import client here to avoid circular imports
140
- from ..client import AuthenticatedClient
141
-
142
- # Create authenticated client with X-API-Key
143
- # The token is extracted from X-API-Key header in extensions.py
144
- if not self.token:
145
- return UploadResult(
146
- file_id="",
147
- file_size=0,
148
- row_count=0,
149
- table_name=table_name,
150
- file_name=file_name,
151
- success=False,
152
- error="No API key provided. Set X-API-Key in headers.",
153
- )
154
-
155
- client = AuthenticatedClient(
156
- base_url=self.base_url,
157
- token=self.token,
158
- prefix="", # No prefix for X-API-Key
159
- auth_header_name="X-API-Key", # Use X-API-Key header instead of Authorization
160
- headers=self.headers,
161
- )
162
-
163
- # Step 1: Get presigned upload URL
164
- if options.on_progress:
165
- options.on_progress(
166
- f"Getting upload URL for {file_name} -> table '{table_name}'..."
167
- )
168
-
169
- upload_request = FileUploadRequest(
170
- file_name=file_name, content_type="application/x-parquet", table_name=table_name
171
- )
172
-
173
- kwargs = {
174
- "graph_id": graph_id,
175
- "client": client,
176
- "body": upload_request,
177
- }
178
-
179
- response = create_file_upload(**kwargs)
180
-
181
- if not response.parsed:
182
- error_msg = f"Failed to get upload URL (status: {response.status_code})"
183
- if hasattr(response, "content"):
184
- try:
185
- error_detail = json.loads(response.content)
186
- error_msg = f"{error_msg}: {error_detail}"
187
- except (json.JSONDecodeError, ValueError):
188
- error_msg = f"{error_msg}: {response.content[:200]}"
189
-
190
- return UploadResult(
191
- file_id="",
192
- file_size=0,
193
- row_count=0,
194
- table_name=table_name,
195
- file_name=file_name,
196
- success=False,
197
- error=error_msg,
198
- )
199
-
200
- upload_url = response.parsed.upload_url
201
- file_id = response.parsed.file_id
202
-
203
- # Fix LocalStack URL if needed
204
- if options.fix_localstack_url and "localstack:4566" in upload_url:
205
- upload_url = upload_url.replace("localstack:4566", "localhost:4566")
206
- logger.debug("Fixed LocalStack URL for localhost access")
207
-
208
- # Step 2: Upload file to S3
209
- if options.on_progress:
210
- options.on_progress(f"Uploading {file_name} to S3...")
211
-
212
- # Read file content - handle both paths and buffers
213
- if is_buffer:
214
- # Read from buffer
215
- if hasattr(file_or_buffer, "getvalue"):
216
- file_content = file_or_buffer.getvalue()
217
- else:
218
- # BinaryIO or file-like object
219
- file_or_buffer.seek(0)
220
- file_content = file_or_buffer.read()
221
- else:
222
- # Read from file path
223
- if file_path is None:
224
- raise ValueError("file_path should not be None when not using buffer")
225
- with open(file_path, "rb") as f:
226
- file_content = f.read()
227
-
228
- s3_response = self._http_client.put(
229
- upload_url,
230
- content=file_content,
231
- headers={"Content-Type": "application/x-parquet"},
232
- )
233
- s3_response.raise_for_status()
234
-
235
- # Step 3: Mark file as uploaded (backend validates and calculates size/row count)
236
- if options.on_progress:
237
- options.on_progress(f"Marking {file_name} as uploaded...")
238
-
239
- status_update = FileStatusUpdate(status="uploaded")
240
-
241
- kwargs = {
242
- "graph_id": graph_id,
243
- "file_id": file_id,
244
- "client": client,
245
- "body": status_update,
246
- }
247
-
248
- update_response = update_file(**kwargs)
249
-
250
- if not update_response.parsed:
251
- logger.error(
252
- f"No parsed response from update_file. Status code: {update_response.status_code}"
253
- )
254
- return UploadResult(
255
- file_id=file_id,
256
- file_size=0,
257
- row_count=0,
258
- table_name=table_name,
259
- file_name=file_name,
260
- success=False,
261
- error="Failed to complete file upload",
262
- )
263
-
264
- response_data = update_response.parsed
265
-
266
- if isinstance(response_data, dict):
267
- file_size = response_data.get("file_size_bytes", 0)
268
- row_count = response_data.get("row_count", 0)
269
- elif hasattr(response_data, "additional_properties"):
270
- file_size = response_data.additional_properties.get("file_size_bytes", 0)
271
- row_count = response_data.additional_properties.get("row_count", 0)
272
- else:
273
- file_size = getattr(response_data, "file_size_bytes", 0)
274
- row_count = getattr(response_data, "row_count", 0)
275
-
276
- if options.on_progress:
277
- options.on_progress(
278
- f"✅ Uploaded {file_name} ({file_size:,} bytes, {row_count:,} rows)"
279
- )
280
-
281
- return UploadResult(
282
- file_id=file_id,
283
- file_size=file_size,
284
- row_count=row_count,
285
- table_name=table_name,
286
- file_name=file_name,
287
- success=True,
288
- )
289
-
290
- except Exception as e:
291
- logger.error(f"Upload failed for {file_name}: {e}")
292
- return UploadResult(
293
- file_id="",
294
- file_size=0,
295
- row_count=0,
296
- table_name=table_name,
297
- file_name=file_name,
298
- success=False,
299
- error=str(e),
300
- )
301
-
302
- def list_staging_tables(self, graph_id: str) -> List[TableInfo]:
303
- """
304
- List all staging tables in a graph.
305
-
306
- Args:
307
- graph_id: The graph ID
308
-
309
- Returns:
310
- List of TableInfo objects
311
- """
312
- try:
313
- from ..client import AuthenticatedClient
314
-
315
- if not self.token:
316
- logger.error("No API key provided")
317
- return []
318
-
319
- client = AuthenticatedClient(
320
- base_url=self.base_url,
321
- token=self.token,
322
- prefix="",
323
- auth_header_name="X-API-Key",
324
- headers=self.headers,
325
- )
326
-
327
- kwargs = {"graph_id": graph_id, "client": client}
328
-
329
- response = list_tables(**kwargs)
330
-
331
- if not response.parsed:
332
- logger.error("Failed to list tables")
333
- return []
334
-
335
- tables = []
336
- for table_data in response.parsed.tables:
337
- tables.append(
338
- TableInfo(
339
- table_name=table_data.table_name,
340
- row_count=table_data.row_count,
341
- file_count=table_data.file_count,
342
- total_size_bytes=table_data.total_size_bytes,
343
- )
344
- )
345
-
346
- return tables
347
-
348
- except Exception as e:
349
- logger.error(f"Failed to list tables: {e}")
350
- return []
351
-
352
- def ingest_all_tables(
353
- self, graph_id: str, options: Optional[IngestOptions] = None
354
- ) -> Dict[str, Any]:
355
- """
356
- Materialize the graph from all staging tables.
357
-
358
- This rebuilds the complete graph database from the current state of DuckDB staging tables.
359
-
360
- Args:
361
- graph_id: The graph ID
362
- options: Ingest options
363
-
364
- Returns:
365
- Dictionary with materialization results
366
- """
367
- if options is None:
368
- options = IngestOptions()
369
-
370
- try:
371
- from ..client import AuthenticatedClient
372
-
373
- if not self.token:
374
- return {"success": False, "error": "No API key provided"}
375
-
376
- client = AuthenticatedClient(
377
- base_url=self.base_url,
378
- token=self.token,
379
- prefix="",
380
- auth_header_name="X-API-Key",
381
- headers=self.headers,
382
- )
383
-
384
- if options.on_progress:
385
- options.on_progress("Starting table materialization...")
386
-
387
- materialize_request = MaterializeRequest(
388
- ignore_errors=options.ignore_errors, rebuild=options.rebuild, force=True
389
- )
390
-
391
- kwargs = {
392
- "graph_id": graph_id,
393
- "client": client,
394
- "body": materialize_request,
395
- }
396
-
397
- response = materialize_graph(**kwargs)
398
-
399
- if not response.parsed:
400
- return {"success": False, "error": "Failed to materialize graph"}
401
-
402
- result = {
403
- "success": True,
404
- "operation_id": getattr(response.parsed, "operation_id", None),
405
- "message": getattr(response.parsed, "message", "Materialization started"),
406
- }
407
-
408
- if options.on_progress:
409
- options.on_progress("✅ Graph materialization completed")
410
-
411
- return result
412
-
413
- except Exception as e:
414
- logger.error(f"Failed to materialize graph: {e}")
415
- return {"success": False, "error": str(e)}
416
-
417
- def upload_and_ingest(
418
- self,
419
- graph_id: str,
420
- table_name: str,
421
- file_path: Path,
422
- upload_options: Optional[UploadOptions] = None,
423
- ingest_options: Optional[IngestOptions] = None,
424
- ) -> Dict[str, Any]:
425
- """
426
- Convenience method to upload a file and immediately ingest it.
427
-
428
- Args:
429
- graph_id: The graph ID
430
- table_name: Name of the staging table
431
- file_path: Path to the Parquet file
432
- upload_options: Upload options
433
- ingest_options: Ingest options
434
-
435
- Returns:
436
- Dictionary with upload and ingest results
437
- """
438
- # Upload the file
439
- upload_result = self.upload_parquet_file(
440
- graph_id, table_name, file_path, upload_options
441
- )
442
-
443
- if not upload_result.success:
444
- return {
445
- "success": False,
446
- "upload": upload_result,
447
- "ingest": None,
448
- "error": upload_result.error,
449
- }
450
-
451
- # Ingest the table
452
- ingest_result = self.ingest_all_tables(graph_id, ingest_options)
453
-
454
- return {
455
- "success": upload_result.success and ingest_result.get("success", False),
456
- "upload": upload_result,
457
- "ingest": ingest_result,
458
- }
459
-
460
- def close(self):
461
- """Close HTTP client connections"""
462
- if self._http_client:
463
- self._http_client.close()