robosystems-client 0.2.16__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of robosystems-client might be problematic. Click here for more details.

@@ -28,12 +28,22 @@ from .operation_client import (
28
28
  OperationProgress,
29
29
  OperationResult,
30
30
  )
31
- from .table_ingest_client import (
32
- TableIngestClient,
33
- UploadOptions,
34
- IngestOptions,
35
- UploadResult,
31
+ from .file_client import (
32
+ FileClient,
33
+ FileUploadOptions,
34
+ FileUploadResult,
35
+ FileInfo,
36
+ )
37
+ from .materialization_client import (
38
+ MaterializationClient,
39
+ MaterializationOptions,
40
+ MaterializationResult,
41
+ MaterializationStatus,
42
+ )
43
+ from .table_client import (
44
+ TableClient,
36
45
  TableInfo,
46
+ QueryResult as TableQueryResult,
37
47
  )
38
48
  from .graph_client import (
39
49
  GraphClient,
@@ -177,12 +187,20 @@ __all__ = [
177
187
  "OperationStatus",
178
188
  "OperationProgress",
179
189
  "OperationResult",
180
- # Table Ingest Client
181
- "TableIngestClient",
182
- "UploadOptions",
183
- "IngestOptions",
184
- "UploadResult",
190
+ # File Client
191
+ "FileClient",
192
+ "FileUploadOptions",
193
+ "FileUploadResult",
194
+ "FileInfo",
195
+ # Materialization Client
196
+ "MaterializationClient",
197
+ "MaterializationOptions",
198
+ "MaterializationResult",
199
+ "MaterializationStatus",
200
+ # Table Client
201
+ "TableClient",
185
202
  "TableInfo",
203
+ "TableQueryResult",
186
204
  # Graph Client
187
205
  "GraphClient",
188
206
  "GraphMetadata",
@@ -9,7 +9,9 @@ from typing import Dict, Any, Optional, Callable
9
9
  from .query_client import QueryClient
10
10
  from .agent_client import AgentClient
11
11
  from .operation_client import OperationClient
12
- from .table_ingest_client import TableIngestClient
12
+ from .file_client import FileClient
13
+ from .materialization_client import MaterializationClient
14
+ from .table_client import TableClient
13
15
  from .graph_client import GraphClient
14
16
  from .sse_client import SSEClient
15
17
 
@@ -61,7 +63,9 @@ class RoboSystemsExtensions:
61
63
  self.query = QueryClient(self.config)
62
64
  self.agent = AgentClient(self.config)
63
65
  self.operations = OperationClient(self.config)
64
- self.tables = TableIngestClient(self.config)
66
+ self.files = FileClient(self.config)
67
+ self.materialization = MaterializationClient(self.config)
68
+ self.tables = TableClient(self.config)
65
69
  self.graphs = GraphClient(self.config)
66
70
 
67
71
  def monitor_operation(
@@ -92,7 +96,12 @@ class RoboSystemsExtensions:
92
96
  self.query.close()
93
97
  self.agent.close()
94
98
  self.operations.close_all()
95
- self.tables.close()
99
+ if hasattr(self.files, "close"):
100
+ self.files.close()
101
+ if hasattr(self.materialization, "close"):
102
+ self.materialization.close()
103
+ if hasattr(self.tables, "close"):
104
+ self.tables.close()
96
105
  self.graphs.close()
97
106
 
98
107
  # Convenience methods that delegate to the appropriate clients
@@ -0,0 +1,380 @@
1
+ """File Client for RoboSystems API
2
+
3
+ Manages file operations as first-class resources with multi-layer status tracking.
4
+ Files are independent entities with their own lifecycle (S3 → DuckDB → Graph).
5
+ """
6
+
7
+ from dataclasses import dataclass
8
+ from io import BytesIO
9
+ from pathlib import Path
10
+ from typing import Dict, Any, Optional, Callable, Union, BinaryIO
11
+ import logging
12
+ import httpx
13
+
14
+ from ..api.files.create_file_upload import (
15
+ sync_detailed as create_file_upload,
16
+ )
17
+ from ..api.files.update_file import (
18
+ sync_detailed as update_file,
19
+ )
20
+ from ..api.files.list_files import (
21
+ sync_detailed as list_files,
22
+ )
23
+ from ..api.files.get_file import (
24
+ sync_detailed as get_file,
25
+ )
26
+ from ..api.files.delete_file import (
27
+ sync_detailed as delete_file,
28
+ )
29
+ from ..models.file_upload_request import FileUploadRequest
30
+ from ..models.file_status_update import FileStatusUpdate
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+
35
+ @dataclass
36
+ class FileUploadOptions:
37
+ """Options for file upload operations"""
38
+
39
+ on_progress: Optional[Callable[[str], None]] = None
40
+ fix_localstack_url: bool = True
41
+ ingest_to_graph: bool = False
42
+
43
+
44
+ @dataclass
45
+ class FileUploadResult:
46
+ """Result from file upload operation"""
47
+
48
+ file_id: str
49
+ file_size: int
50
+ row_count: int
51
+ table_name: str
52
+ file_name: str
53
+ success: bool = True
54
+ error: Optional[str] = None
55
+
56
+
57
+ @dataclass
58
+ class FileInfo:
59
+ """Information about a file"""
60
+
61
+ file_id: str
62
+ file_name: str
63
+ file_format: str
64
+ size_bytes: int
65
+ row_count: Optional[int]
66
+ upload_status: str
67
+ table_name: str
68
+ created_at: Optional[str]
69
+ uploaded_at: Optional[str]
70
+ layers: Optional[Dict[str, Any]] = None
71
+
72
+
73
+ class FileClient:
74
+ """Client for managing files as first-class resources"""
75
+
76
+ def __init__(self, config: Dict[str, Any]):
77
+ self.config = config
78
+ self.base_url = config["base_url"]
79
+ self.headers = config.get("headers", {})
80
+ self.token = config.get("token")
81
+ self._http_client = httpx.Client(timeout=120.0)
82
+
83
+ def upload(
84
+ self,
85
+ graph_id: str,
86
+ table_name: str,
87
+ file_or_buffer: Union[Path, str, BytesIO, BinaryIO],
88
+ options: Optional[FileUploadOptions] = None,
89
+ ) -> FileUploadResult:
90
+ """
91
+ Upload a file to a table.
92
+
93
+ This handles the complete 3-step upload process:
94
+ 1. Get presigned upload URL
95
+ 2. Upload file to S3
96
+ 3. Mark file as 'uploaded' (triggers DuckDB staging)
97
+
98
+ Args:
99
+ graph_id: Graph database identifier
100
+ table_name: Table to associate file with
101
+ file_or_buffer: File path, Path object, BytesIO, or file-like object
102
+ options: Upload options (progress callback, LocalStack URL fix, auto-ingest)
103
+
104
+ Returns:
105
+ FileUploadResult with file metadata and status
106
+ """
107
+ options = options or FileUploadOptions()
108
+
109
+ try:
110
+ # Determine file name and read content
111
+ if isinstance(file_or_buffer, (str, Path)):
112
+ file_path = Path(file_or_buffer)
113
+ file_name = file_path.name
114
+ with open(file_path, "rb") as f:
115
+ file_content = f.read()
116
+ elif isinstance(file_or_buffer, BytesIO):
117
+ file_name = "data.parquet"
118
+ file_content = file_or_buffer.getvalue()
119
+ elif hasattr(file_or_buffer, "read"):
120
+ file_name = getattr(file_or_buffer, "name", "data.parquet")
121
+ file_content = file_or_buffer.read()
122
+ else:
123
+ raise ValueError(f"Unsupported file type: {type(file_or_buffer)}")
124
+
125
+ # Step 1: Get presigned upload URL
126
+ if options.on_progress:
127
+ options.on_progress(
128
+ f"Getting upload URL for {file_name} → table '{table_name}'..."
129
+ )
130
+
131
+ upload_request = FileUploadRequest(
132
+ file_name=file_name,
133
+ content_type="application/x-parquet",
134
+ table_name=table_name,
135
+ )
136
+
137
+ kwargs = {
138
+ "graph_id": graph_id,
139
+ "client": self.config.get("client"),
140
+ "body": upload_request,
141
+ }
142
+
143
+ response = create_file_upload(**kwargs)
144
+
145
+ if response.status_code != 200 or not response.parsed:
146
+ error_msg = f"Failed to get upload URL: {response.status_code}"
147
+ return FileUploadResult(
148
+ file_id="",
149
+ file_size=0,
150
+ row_count=0,
151
+ table_name=table_name,
152
+ file_name=file_name,
153
+ success=False,
154
+ error=error_msg,
155
+ )
156
+
157
+ upload_data = response.parsed
158
+ upload_url = upload_data.upload_url
159
+ file_id = upload_data.file_id
160
+
161
+ # Fix LocalStack URL if needed
162
+ if options.fix_localstack_url and "localstack:4566" in upload_url:
163
+ upload_url = upload_url.replace("localstack:4566", "localhost:4566")
164
+
165
+ # Step 2: Upload file to S3
166
+ if options.on_progress:
167
+ options.on_progress(f"Uploading {file_name} to S3...")
168
+
169
+ s3_response = self._http_client.put(
170
+ upload_url,
171
+ content=file_content,
172
+ headers={"Content-Type": "application/x-parquet"},
173
+ )
174
+
175
+ if s3_response.status_code not in [200, 204]:
176
+ return FileUploadResult(
177
+ file_id=file_id,
178
+ file_size=len(file_content),
179
+ row_count=0,
180
+ table_name=table_name,
181
+ file_name=file_name,
182
+ success=False,
183
+ error=f"S3 upload failed: {s3_response.status_code}",
184
+ )
185
+
186
+ # Step 3: Mark file as uploaded
187
+ if options.on_progress:
188
+ options.on_progress(f"Marking {file_name} as uploaded...")
189
+
190
+ status_update = FileStatusUpdate(
191
+ status="uploaded",
192
+ ingest_to_graph=options.ingest_to_graph,
193
+ )
194
+
195
+ update_kwargs = {
196
+ "graph_id": graph_id,
197
+ "file_id": file_id,
198
+ "client": self.config.get("client"),
199
+ "body": status_update,
200
+ }
201
+
202
+ update_response = update_file(**update_kwargs)
203
+
204
+ if update_response.status_code != 200 or not update_response.parsed:
205
+ return FileUploadResult(
206
+ file_id=file_id,
207
+ file_size=len(file_content),
208
+ row_count=0,
209
+ table_name=table_name,
210
+ file_name=file_name,
211
+ success=False,
212
+ error="Failed to complete file upload",
213
+ )
214
+
215
+ # Extract metadata from response
216
+ response_data = update_response.parsed
217
+ actual_file_size = getattr(response_data, "file_size_bytes", len(file_content))
218
+ actual_row_count = getattr(response_data, "row_count", 0)
219
+
220
+ if options.on_progress:
221
+ options.on_progress(
222
+ f"✅ Uploaded {file_name} ({actual_file_size:,} bytes, {actual_row_count:,} rows)"
223
+ )
224
+
225
+ return FileUploadResult(
226
+ file_id=file_id,
227
+ file_size=actual_file_size,
228
+ row_count=actual_row_count,
229
+ table_name=table_name,
230
+ file_name=file_name,
231
+ success=True,
232
+ )
233
+
234
+ except Exception as e:
235
+ logger.error(f"File upload failed: {e}")
236
+ return FileUploadResult(
237
+ file_id="",
238
+ file_size=0,
239
+ row_count=0,
240
+ table_name=table_name,
241
+ file_name=getattr(file_or_buffer, "name", "unknown"),
242
+ success=False,
243
+ error=str(e),
244
+ )
245
+
246
+ def list(
247
+ self,
248
+ graph_id: str,
249
+ table_name: Optional[str] = None,
250
+ status: Optional[str] = None,
251
+ ) -> list[FileInfo]:
252
+ """
253
+ List files in a graph with optional filtering.
254
+
255
+ Args:
256
+ graph_id: Graph database identifier
257
+ table_name: Optional table name filter
258
+ status: Optional upload status filter (uploaded, pending, etc.)
259
+
260
+ Returns:
261
+ List of FileInfo objects
262
+ """
263
+ try:
264
+ kwargs = {
265
+ "graph_id": graph_id,
266
+ "client": self.config.get("client"),
267
+ }
268
+
269
+ if table_name:
270
+ kwargs["table_name"] = table_name
271
+ if status:
272
+ kwargs["status"] = status
273
+
274
+ response = list_files(**kwargs)
275
+
276
+ if response.status_code != 200 or not response.parsed:
277
+ logger.error(f"Failed to list files: {response.status_code}")
278
+ return []
279
+
280
+ files_data = response.parsed
281
+ files = getattr(files_data, "files", [])
282
+
283
+ return [
284
+ FileInfo(
285
+ file_id=f.file_id,
286
+ file_name=f.file_name,
287
+ file_format=f.file_format,
288
+ size_bytes=f.size_bytes or 0,
289
+ row_count=f.row_count,
290
+ upload_status=f.upload_status,
291
+ table_name=getattr(f, "table_name", ""),
292
+ created_at=f.created_at,
293
+ uploaded_at=f.uploaded_at,
294
+ )
295
+ for f in files
296
+ ]
297
+
298
+ except Exception as e:
299
+ logger.error(f"Failed to list files: {e}")
300
+ return []
301
+
302
+ def get(self, graph_id: str, file_id: str) -> Optional[FileInfo]:
303
+ """
304
+ Get detailed information about a specific file.
305
+
306
+ Args:
307
+ graph_id: Graph database identifier
308
+ file_id: File ID
309
+
310
+ Returns:
311
+ FileInfo with multi-layer status tracking, or None if not found
312
+ """
313
+ try:
314
+ kwargs = {
315
+ "graph_id": graph_id,
316
+ "file_id": file_id,
317
+ "client": self.config.get("client"),
318
+ }
319
+
320
+ response = get_file(**kwargs)
321
+
322
+ if response.status_code != 200 or not response.parsed:
323
+ logger.error(f"Failed to get file {file_id}: {response.status_code}")
324
+ return None
325
+
326
+ file_data = response.parsed
327
+
328
+ return FileInfo(
329
+ file_id=file_data.file_id,
330
+ file_name=file_data.file_name,
331
+ file_format=file_data.file_format,
332
+ size_bytes=file_data.size_bytes or 0,
333
+ row_count=file_data.row_count,
334
+ upload_status=file_data.upload_status,
335
+ table_name=file_data.table_name or "",
336
+ created_at=file_data.created_at,
337
+ uploaded_at=file_data.uploaded_at,
338
+ layers=getattr(file_data, "layers", None),
339
+ )
340
+
341
+ except Exception as e:
342
+ logger.error(f"Failed to get file {file_id}: {e}")
343
+ return None
344
+
345
+ def delete(self, graph_id: str, file_id: str, cascade: bool = False) -> bool:
346
+ """
347
+ Delete a file from all layers.
348
+
349
+ Args:
350
+ graph_id: Graph database identifier
351
+ file_id: File ID to delete
352
+ cascade: If True, delete from all layers including DuckDB and graph
353
+
354
+ Returns:
355
+ True if deletion succeeded, False otherwise
356
+ """
357
+ try:
358
+ kwargs = {
359
+ "graph_id": graph_id,
360
+ "file_id": file_id,
361
+ "client": self.config.get("client"),
362
+ "cascade": cascade,
363
+ }
364
+
365
+ response = delete_file(**kwargs)
366
+
367
+ if response.status_code not in [200, 204]:
368
+ logger.error(f"Failed to delete file {file_id}: {response.status_code}")
369
+ return False
370
+
371
+ return True
372
+
373
+ except Exception as e:
374
+ logger.error(f"Failed to delete file {file_id}: {e}")
375
+ return False
376
+
377
+ def __del__(self):
378
+ """Cleanup HTTP client on deletion"""
379
+ if hasattr(self, "_http_client"):
380
+ self._http_client.close()
@@ -0,0 +1,211 @@
1
+ """Materialization Client for RoboSystems API
2
+
3
+ Manages graph materialization from DuckDB staging tables.
4
+ Treats the graph database as a materialized view of the mutable DuckDB data lake.
5
+ """
6
+
7
+ from dataclasses import dataclass
8
+ from typing import Dict, Any, Optional, Callable
9
+ import logging
10
+
11
+ from ..api.materialization.materialize_graph import (
12
+ sync_detailed as materialize_graph,
13
+ )
14
+ from ..api.materialization.get_materialization_status import (
15
+ sync_detailed as get_materialization_status,
16
+ )
17
+ from ..models.materialize_request import MaterializeRequest
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ @dataclass
23
+ class MaterializationOptions:
24
+ """Options for graph materialization operations"""
25
+
26
+ ignore_errors: bool = True
27
+ rebuild: bool = False
28
+ force: bool = False
29
+ on_progress: Optional[Callable[[str], None]] = None
30
+
31
+
32
+ @dataclass
33
+ class MaterializationResult:
34
+ """Result from materialization operation"""
35
+
36
+ status: str
37
+ was_stale: bool
38
+ stale_reason: Optional[str]
39
+ tables_materialized: list[str]
40
+ total_rows: int
41
+ execution_time_ms: float
42
+ message: str
43
+ success: bool = True
44
+ error: Optional[str] = None
45
+
46
+
47
+ @dataclass
48
+ class MaterializationStatus:
49
+ """Status information about graph materialization"""
50
+
51
+ graph_id: str
52
+ is_stale: bool
53
+ stale_reason: Optional[str]
54
+ stale_since: Optional[str]
55
+ last_materialized_at: Optional[str]
56
+ materialization_count: int
57
+ hours_since_materialization: Optional[float]
58
+ message: str
59
+
60
+
61
+ class MaterializationClient:
62
+ """Client for managing graph materialization operations"""
63
+
64
+ def __init__(self, config: Dict[str, Any]):
65
+ self.config = config
66
+ self.base_url = config["base_url"]
67
+ self.headers = config.get("headers", {})
68
+ self.token = config.get("token")
69
+
70
+ def materialize(
71
+ self,
72
+ graph_id: str,
73
+ options: Optional[MaterializationOptions] = None,
74
+ ) -> MaterializationResult:
75
+ """
76
+ Materialize graph from DuckDB staging tables.
77
+
78
+ Rebuilds the complete graph database from the current state of DuckDB
79
+ staging tables. Automatically discovers all tables, materializes them in
80
+ the correct order (nodes before relationships), and clears the staleness flag.
81
+
82
+ Args:
83
+ graph_id: Graph database identifier
84
+ options: Materialization options (ignore_errors, rebuild, force)
85
+
86
+ Returns:
87
+ MaterializationResult with detailed execution information
88
+
89
+ When to use:
90
+ - After batch uploads (files uploaded with ingest_to_graph=false)
91
+ - After cascade file deletions (graph marked stale)
92
+ - Periodic full refresh to ensure consistency
93
+ - Recovery from partial materialization failures
94
+ """
95
+ options = options or MaterializationOptions()
96
+
97
+ try:
98
+ if options.on_progress:
99
+ options.on_progress("Starting graph materialization...")
100
+
101
+ request = MaterializeRequest(
102
+ ignore_errors=options.ignore_errors,
103
+ rebuild=options.rebuild,
104
+ force=options.force,
105
+ )
106
+
107
+ kwargs = {
108
+ "graph_id": graph_id,
109
+ "client": self.config.get("client"),
110
+ "body": request,
111
+ }
112
+
113
+ response = materialize_graph(**kwargs)
114
+
115
+ if response.status_code != 200 or not response.parsed:
116
+ error_msg = f"Materialization failed: {response.status_code}"
117
+ if hasattr(response, "content"):
118
+ try:
119
+ import json
120
+
121
+ error_data = json.loads(response.content)
122
+ error_msg = error_data.get("detail", error_msg)
123
+ except Exception:
124
+ pass
125
+
126
+ return MaterializationResult(
127
+ status="failed",
128
+ was_stale=False,
129
+ stale_reason=None,
130
+ tables_materialized=[],
131
+ total_rows=0,
132
+ execution_time_ms=0,
133
+ message=error_msg,
134
+ success=False,
135
+ error=error_msg,
136
+ )
137
+
138
+ result_data = response.parsed
139
+
140
+ if options.on_progress:
141
+ options.on_progress(
142
+ f"✅ Materialization complete: {len(result_data.tables_materialized)} tables, "
143
+ f"{result_data.total_rows:,} rows in {result_data.execution_time_ms:.2f}ms"
144
+ )
145
+
146
+ return MaterializationResult(
147
+ status=result_data.status,
148
+ was_stale=result_data.was_stale,
149
+ stale_reason=result_data.stale_reason,
150
+ tables_materialized=result_data.tables_materialized,
151
+ total_rows=result_data.total_rows,
152
+ execution_time_ms=result_data.execution_time_ms,
153
+ message=result_data.message,
154
+ success=True,
155
+ )
156
+
157
+ except Exception as e:
158
+ logger.error(f"Materialization failed: {e}")
159
+ return MaterializationResult(
160
+ status="failed",
161
+ was_stale=False,
162
+ stale_reason=None,
163
+ tables_materialized=[],
164
+ total_rows=0,
165
+ execution_time_ms=0,
166
+ message=str(e),
167
+ success=False,
168
+ error=str(e),
169
+ )
170
+
171
+ def status(self, graph_id: str) -> Optional[MaterializationStatus]:
172
+ """
173
+ Get current materialization status for the graph.
174
+
175
+ Shows whether the graph is stale (DuckDB has changes not yet in graph database),
176
+ when it was last materialized, and how long since last materialization.
177
+
178
+ Args:
179
+ graph_id: Graph database identifier
180
+
181
+ Returns:
182
+ MaterializationStatus with staleness and timing information
183
+ """
184
+ try:
185
+ kwargs = {
186
+ "graph_id": graph_id,
187
+ "client": self.config.get("client"),
188
+ }
189
+
190
+ response = get_materialization_status(**kwargs)
191
+
192
+ if response.status_code != 200 or not response.parsed:
193
+ logger.error(f"Failed to get materialization status: {response.status_code}")
194
+ return None
195
+
196
+ status_data = response.parsed
197
+
198
+ return MaterializationStatus(
199
+ graph_id=status_data.graph_id,
200
+ is_stale=status_data.is_stale,
201
+ stale_reason=status_data.stale_reason,
202
+ stale_since=status_data.stale_since,
203
+ last_materialized_at=status_data.last_materialized_at,
204
+ materialization_count=status_data.materialization_count,
205
+ hours_since_materialization=status_data.hours_since_materialization,
206
+ message=status_data.message,
207
+ )
208
+
209
+ except Exception as e:
210
+ logger.error(f"Failed to get materialization status: {e}")
211
+ return None
@@ -0,0 +1,161 @@
1
+ """Table Client for RoboSystems API
2
+
3
+ Manages DuckDB staging table operations.
4
+ Tables provide SQL-queryable staging layer before graph materialization.
5
+ """
6
+
7
+ from dataclasses import dataclass
8
+ from typing import Dict, Any, Optional
9
+ import logging
10
+
11
+ from ..api.tables.list_tables import (
12
+ sync_detailed as list_tables,
13
+ )
14
+ from ..api.tables.query_tables import (
15
+ sync_detailed as query_tables,
16
+ )
17
+ from ..models.table_query_request import TableQueryRequest
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ @dataclass
23
+ class TableInfo:
24
+ """Information about a DuckDB staging table"""
25
+
26
+ table_name: str
27
+ table_type: str
28
+ row_count: int
29
+ file_count: int
30
+ total_size_bytes: int
31
+
32
+
33
+ @dataclass
34
+ class QueryResult:
35
+ """Result from SQL query execution"""
36
+
37
+ columns: list[str]
38
+ rows: list[list[Any]]
39
+ row_count: int
40
+ execution_time_ms: float
41
+ success: bool = True
42
+ error: Optional[str] = None
43
+
44
+
45
+ class TableClient:
46
+ """Client for managing DuckDB staging tables"""
47
+
48
+ def __init__(self, config: Dict[str, Any]):
49
+ self.config = config
50
+ self.base_url = config["base_url"]
51
+ self.headers = config.get("headers", {})
52
+ self.token = config.get("token")
53
+
54
+ def list(self, graph_id: str) -> list[TableInfo]:
55
+ """
56
+ List all DuckDB staging tables in a graph.
57
+
58
+ Args:
59
+ graph_id: Graph database identifier
60
+
61
+ Returns:
62
+ List of TableInfo objects with metadata
63
+ """
64
+ try:
65
+ kwargs = {
66
+ "graph_id": graph_id,
67
+ "client": self.config.get("client"),
68
+ }
69
+
70
+ response = list_tables(**kwargs)
71
+
72
+ if response.status_code != 200 or not response.parsed:
73
+ logger.error(f"Failed to list tables: {response.status_code}")
74
+ return []
75
+
76
+ table_data = response.parsed
77
+ tables = getattr(table_data, "tables", [])
78
+
79
+ return [
80
+ TableInfo(
81
+ table_name=t.table_name,
82
+ table_type=t.table_type,
83
+ row_count=t.row_count,
84
+ file_count=t.file_count or 0,
85
+ total_size_bytes=t.total_size_bytes or 0,
86
+ )
87
+ for t in tables
88
+ ]
89
+
90
+ except Exception as e:
91
+ logger.error(f"Failed to list tables: {e}")
92
+ return []
93
+
94
+ def query(
95
+ self, graph_id: str, sql_query: str, limit: Optional[int] = None
96
+ ) -> QueryResult:
97
+ """
98
+ Execute SQL query against DuckDB staging tables.
99
+
100
+ Args:
101
+ graph_id: Graph database identifier
102
+ sql_query: SQL query to execute
103
+ limit: Optional row limit
104
+
105
+ Returns:
106
+ QueryResult with columns and rows
107
+
108
+ Example:
109
+ >>> result = client.tables.query(
110
+ ... graph_id,
111
+ ... "SELECT * FROM Entity WHERE entity_type = 'CORPORATION'"
112
+ ... )
113
+ >>> for row in result.rows:
114
+ ... print(row)
115
+ """
116
+ try:
117
+ final_query = sql_query
118
+ if limit is not None:
119
+ final_query = f"{sql_query.rstrip(';')} LIMIT {limit}"
120
+
121
+ request = TableQueryRequest(sql=final_query)
122
+
123
+ kwargs = {
124
+ "graph_id": graph_id,
125
+ "client": self.config.get("client"),
126
+ "body": request,
127
+ }
128
+
129
+ response = query_tables(**kwargs)
130
+
131
+ if response.status_code != 200 or not response.parsed:
132
+ error_msg = f"Query failed: {response.status_code}"
133
+ return QueryResult(
134
+ columns=[],
135
+ rows=[],
136
+ row_count=0,
137
+ execution_time_ms=0,
138
+ success=False,
139
+ error=error_msg,
140
+ )
141
+
142
+ result_data = response.parsed
143
+
144
+ return QueryResult(
145
+ columns=result_data.columns,
146
+ rows=result_data.rows,
147
+ row_count=len(result_data.rows),
148
+ execution_time_ms=getattr(result_data, "execution_time_ms", 0),
149
+ success=True,
150
+ )
151
+
152
+ except Exception as e:
153
+ logger.error(f"Query failed: {e}")
154
+ return QueryResult(
155
+ columns=[],
156
+ rows=[],
157
+ row_count=0,
158
+ execution_time_ms=0,
159
+ success=False,
160
+ error=str(e),
161
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: robosystems-client
3
- Version: 0.2.16
3
+ Version: 0.2.17
4
4
  Summary: Python Client for RoboSystems financial graph database API
5
5
  Author: RFS LLC
6
6
  License: MIT
@@ -142,18 +142,20 @@ robosystems_client/api/views/__init__.py,sha256=5vd9uJWAjRqa9xzxzYkLD1yoZ12Ld_bA
142
142
  robosystems_client/api/views/create_view.py,sha256=aBjk0FsAj6A3K2iqjE7DcTLKjDRsxegFbs5RC0hmbqA,6316
143
143
  robosystems_client/api/views/save_view.py,sha256=T7PUUNvp0JIeOYWG2n8Nm4Y9hhEWV7i-Gl2Vl8Oz4Ls,9224
144
144
  robosystems_client/extensions/README.md,sha256=qfHFjdgA_J-zNXziNZE6M1MKJiwVkocBi01w_HhvzEk,16136
145
- robosystems_client/extensions/__init__.py,sha256=FNJ0IP9ZwiPOXzDS7Sc8PAqLXxyFgje41_S5MVivDAs,6687
145
+ robosystems_client/extensions/__init__.py,sha256=eTuJQGygQTOWC51YVhJOWUWFUMLcPo7MpZ0H3GaxoR0,7076
146
146
  robosystems_client/extensions/agent_client.py,sha256=Db2C4hrakVsf6ScnBcNk6rte3Kwn4cQBEHsR_joWMTs,17750
147
147
  robosystems_client/extensions/auth_integration.py,sha256=ABOJ8aVjfHehNGNzim1iR9-Cdh7Mr22ce-WgWWeqJt0,6535
148
148
  robosystems_client/extensions/dataframe_utils.py,sha256=gK1bgkVqBF0TvWVdGQvqWrt-ur_Rw11j8uNtMoulLWE,12312
149
149
  robosystems_client/extensions/element_mapping_client.py,sha256=yuh0QPQBPM33E7r6QWWDiKm3T4TfCdbn2kvO3Jlw4Cs,18516
150
- robosystems_client/extensions/extensions.py,sha256=QkKIc6cU7uJ5unvH5bdrvq8RuAraqGHh7eY7wpwMVy8,6360
150
+ robosystems_client/extensions/extensions.py,sha256=7vsD3QeIKbwhC1UqNskFjsfKkg_ZO3PPDnc6TxV3PoA,6722
151
+ robosystems_client/extensions/file_client.py,sha256=WyAp0uOGb_wVI_MS8tqg1FjrtNH5o88u4BdR1QJ3Qz0,10586
151
152
  robosystems_client/extensions/graph_client.py,sha256=OBi0xj0SLIRKLeSu_DiGt2ZakCmhggvNrMP3jdRfEgQ,10326
153
+ robosystems_client/extensions/materialization_client.py,sha256=xKrLlNt8jR6lwhv5OW8fg7fRQL100zXcZeZ23s7Oih0,6181
152
154
  robosystems_client/extensions/operation_client.py,sha256=B1qju-wWQrnrnVJixKGgsA_KEInviwJwdlJxzm_i7P0,13359
153
155
  robosystems_client/extensions/query_client.py,sha256=cX3e8EBoTeg4Lwm6edJYRULM2UmGpfqNX3f48S8TQbE,19430
154
156
  robosystems_client/extensions/sse_client.py,sha256=XvQIq3JQ0Yiax11E7cwclhupShYOpEMURM2cYQodiz8,15058
155
157
  robosystems_client/extensions/subgraph_workspace_client.py,sha256=Ioc7FNJEKaD_kAJBeymwtFlVI-U9t47RouD5ibUHv4g,24036
156
- robosystems_client/extensions/table_ingest_client.py,sha256=1i1trTGjO35S7G9zefCmS1Aqqzt-IK7lJ7pIPwGL3y8,13022
158
+ robosystems_client/extensions/table_client.py,sha256=YXddCxHUPGKd89he2koYT2KR7pwZF4WBcbn0innLjuY,3883
157
159
  robosystems_client/extensions/token_utils.py,sha256=qCK_s1vBzRnSYwtgncPZRLJVIw3WXmzqNTWjdEEpdgs,10899
158
160
  robosystems_client/extensions/utils.py,sha256=vhmUnEsq-UEAMgNhmkqlbJg4oJj096QPiHALEHJ-y4A,16207
159
161
  robosystems_client/extensions/view_builder_client.py,sha256=E1LSiDHAvPf2IhifGOliOAwk5vJyu5PWAnr8ZnyulZM,18590
@@ -412,7 +414,7 @@ robosystems_client/models/view_axis_config_member_labels_type_0.py,sha256=kkzpHx
412
414
  robosystems_client/models/view_config.py,sha256=HQnqYjLMXRhjZLOc5ypwILriMFKuvPzu0hPQi2vyNoM,3795
413
415
  robosystems_client/models/view_source.py,sha256=h66cASj-P_-qOptKv26uAIe9PtIewU2nTs42Ls-lFFk,4098
414
416
  robosystems_client/models/view_source_type.py,sha256=KpgczHUeOinV01jdLvytZ2URKwcsRcp1doPx2D3USyw,169
415
- robosystems_client-0.2.16.dist-info/METADATA,sha256=ZMmCsQ8SU9K2uJ8YDSVmEFe5HMduIl-zar5xq8jYgmU,3904
416
- robosystems_client-0.2.16.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
417
- robosystems_client-0.2.16.dist-info/licenses/LICENSE,sha256=LjFqQPU4eQh7jAQ04SmE9eC0j74HCdXvzbo0hjW4mWo,1063
418
- robosystems_client-0.2.16.dist-info/RECORD,,
417
+ robosystems_client-0.2.17.dist-info/METADATA,sha256=jrEwiwDPG0E1MImeUOVhTJcE1rca6-k2t2THn1L4By4,3904
418
+ robosystems_client-0.2.17.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
419
+ robosystems_client-0.2.17.dist-info/licenses/LICENSE,sha256=LjFqQPU4eQh7jAQ04SmE9eC0j74HCdXvzbo0hjW4mWo,1063
420
+ robosystems_client-0.2.17.dist-info/RECORD,,
@@ -1,463 +0,0 @@
1
- """Table Ingest Client for RoboSystems API
2
-
3
- Simplifies uploading Parquet files to staging tables and ingesting them into graphs.
4
- """
5
-
6
- from dataclasses import dataclass
7
- from io import BytesIO
8
- from pathlib import Path
9
- from typing import Dict, Any, Optional, Callable, List, Union, BinaryIO
10
- import json
11
- import logging
12
- import httpx
13
-
14
- from ..api.files.create_file_upload import (
15
- sync_detailed as create_file_upload,
16
- )
17
- from ..api.files.update_file import (
18
- sync_detailed as update_file,
19
- )
20
- from ..api.tables.list_tables import (
21
- sync_detailed as list_tables,
22
- )
23
- from ..api.materialization.materialize_graph import (
24
- sync_detailed as materialize_graph,
25
- )
26
- from ..models.file_upload_request import FileUploadRequest
27
- from ..models.file_status_update import FileStatusUpdate
28
- from ..models.materialize_request import MaterializeRequest
29
-
30
- logger = logging.getLogger(__name__)
31
-
32
-
33
- @dataclass
34
- class UploadOptions:
35
- """Options for file upload operations"""
36
-
37
- on_progress: Optional[Callable[[str], None]] = None
38
- fix_localstack_url: bool = True # Auto-fix LocalStack URLs for localhost
39
- file_name: Optional[str] = None # Override file name (useful for buffer uploads)
40
-
41
-
42
- @dataclass
43
- class IngestOptions:
44
- """Options for table ingestion operations"""
45
-
46
- ignore_errors: bool = True
47
- rebuild: bool = False
48
- on_progress: Optional[Callable[[str], None]] = None
49
-
50
-
51
- @dataclass
52
- class UploadResult:
53
- """Result from file upload operation"""
54
-
55
- file_id: str
56
- file_size: int
57
- row_count: int
58
- table_name: str
59
- file_name: str
60
- success: bool = True
61
- error: Optional[str] = None
62
-
63
-
64
- @dataclass
65
- class TableInfo:
66
- """Information about a staging table"""
67
-
68
- table_name: str
69
- row_count: int
70
- file_count: int
71
- total_size_bytes: int
72
-
73
-
74
- class TableIngestClient:
75
- """Enhanced table ingest client with simplified upload workflow"""
76
-
77
- def __init__(self, config: Dict[str, Any]):
78
- self.config = config
79
- self.base_url = config["base_url"]
80
- self.headers = config.get("headers", {})
81
- self.token = config.get("token")
82
- # Create httpx client for S3 uploads
83
- self._http_client = httpx.Client(timeout=120.0)
84
-
85
- def upload_parquet_file(
86
- self,
87
- graph_id: str,
88
- table_name: str,
89
- file_or_buffer: Union[Path, str, BytesIO, BinaryIO],
90
- options: Optional[UploadOptions] = None,
91
- ) -> UploadResult:
92
- """
93
- Upload a Parquet file to a staging table.
94
-
95
- This method handles the complete 3-step upload process:
96
- 1. Get presigned upload URL
97
- 2. Upload file to S3
98
- 3. Mark file as 'uploaded' (backend validates, calculates size/row count)
99
-
100
- Args:
101
- graph_id: The graph ID
102
- table_name: Name of the staging table
103
- file_or_buffer: Path to the Parquet file or BytesIO/BinaryIO buffer
104
- options: Upload options
105
-
106
- Returns:
107
- UploadResult with upload details (size/row count calculated by backend)
108
- """
109
- if options is None:
110
- options = UploadOptions()
111
-
112
- # Auto-detect if this is a file path or buffer
113
- is_buffer = isinstance(file_or_buffer, (BytesIO, BinaryIO)) or hasattr(
114
- file_or_buffer, "read"
115
- )
116
-
117
- # Initialize file_path for type checking
118
- file_path: Optional[Path] = None
119
-
120
- if is_buffer:
121
- # Handle buffer upload
122
- file_name = options.file_name or "data.parquet"
123
- else:
124
- # Handle file path upload
125
- file_path = Path(file_or_buffer)
126
- file_name = file_path.name
127
- if not file_path.exists():
128
- return UploadResult(
129
- file_id="",
130
- file_size=0,
131
- row_count=0,
132
- table_name=table_name,
133
- file_name=file_name,
134
- success=False,
135
- error=f"File not found: {file_path}",
136
- )
137
-
138
- try:
139
- # Import client here to avoid circular imports
140
- from ..client import AuthenticatedClient
141
-
142
- # Create authenticated client with X-API-Key
143
- # The token is extracted from X-API-Key header in extensions.py
144
- if not self.token:
145
- return UploadResult(
146
- file_id="",
147
- file_size=0,
148
- row_count=0,
149
- table_name=table_name,
150
- file_name=file_name,
151
- success=False,
152
- error="No API key provided. Set X-API-Key in headers.",
153
- )
154
-
155
- client = AuthenticatedClient(
156
- base_url=self.base_url,
157
- token=self.token,
158
- prefix="", # No prefix for X-API-Key
159
- auth_header_name="X-API-Key", # Use X-API-Key header instead of Authorization
160
- headers=self.headers,
161
- )
162
-
163
- # Step 1: Get presigned upload URL
164
- if options.on_progress:
165
- options.on_progress(
166
- f"Getting upload URL for {file_name} -> table '{table_name}'..."
167
- )
168
-
169
- upload_request = FileUploadRequest(
170
- file_name=file_name, content_type="application/x-parquet", table_name=table_name
171
- )
172
-
173
- kwargs = {
174
- "graph_id": graph_id,
175
- "client": client,
176
- "body": upload_request,
177
- }
178
-
179
- response = create_file_upload(**kwargs)
180
-
181
- if not response.parsed:
182
- error_msg = f"Failed to get upload URL (status: {response.status_code})"
183
- if hasattr(response, "content"):
184
- try:
185
- error_detail = json.loads(response.content)
186
- error_msg = f"{error_msg}: {error_detail}"
187
- except (json.JSONDecodeError, ValueError):
188
- error_msg = f"{error_msg}: {response.content[:200]}"
189
-
190
- return UploadResult(
191
- file_id="",
192
- file_size=0,
193
- row_count=0,
194
- table_name=table_name,
195
- file_name=file_name,
196
- success=False,
197
- error=error_msg,
198
- )
199
-
200
- upload_url = response.parsed.upload_url
201
- file_id = response.parsed.file_id
202
-
203
- # Fix LocalStack URL if needed
204
- if options.fix_localstack_url and "localstack:4566" in upload_url:
205
- upload_url = upload_url.replace("localstack:4566", "localhost:4566")
206
- logger.debug("Fixed LocalStack URL for localhost access")
207
-
208
- # Step 2: Upload file to S3
209
- if options.on_progress:
210
- options.on_progress(f"Uploading {file_name} to S3...")
211
-
212
- # Read file content - handle both paths and buffers
213
- if is_buffer:
214
- # Read from buffer
215
- if hasattr(file_or_buffer, "getvalue"):
216
- file_content = file_or_buffer.getvalue()
217
- else:
218
- # BinaryIO or file-like object
219
- file_or_buffer.seek(0)
220
- file_content = file_or_buffer.read()
221
- else:
222
- # Read from file path
223
- if file_path is None:
224
- raise ValueError("file_path should not be None when not using buffer")
225
- with open(file_path, "rb") as f:
226
- file_content = f.read()
227
-
228
- s3_response = self._http_client.put(
229
- upload_url,
230
- content=file_content,
231
- headers={"Content-Type": "application/x-parquet"},
232
- )
233
- s3_response.raise_for_status()
234
-
235
- # Step 3: Mark file as uploaded (backend validates and calculates size/row count)
236
- if options.on_progress:
237
- options.on_progress(f"Marking {file_name} as uploaded...")
238
-
239
- status_update = FileStatusUpdate(status="uploaded")
240
-
241
- kwargs = {
242
- "graph_id": graph_id,
243
- "file_id": file_id,
244
- "client": client,
245
- "body": status_update,
246
- }
247
-
248
- update_response = update_file(**kwargs)
249
-
250
- if not update_response.parsed:
251
- logger.error(
252
- f"No parsed response from update_file. Status code: {update_response.status_code}"
253
- )
254
- return UploadResult(
255
- file_id=file_id,
256
- file_size=0,
257
- row_count=0,
258
- table_name=table_name,
259
- file_name=file_name,
260
- success=False,
261
- error="Failed to complete file upload",
262
- )
263
-
264
- response_data = update_response.parsed
265
-
266
- if isinstance(response_data, dict):
267
- file_size = response_data.get("file_size_bytes", 0)
268
- row_count = response_data.get("row_count", 0)
269
- elif hasattr(response_data, "additional_properties"):
270
- file_size = response_data.additional_properties.get("file_size_bytes", 0)
271
- row_count = response_data.additional_properties.get("row_count", 0)
272
- else:
273
- file_size = getattr(response_data, "file_size_bytes", 0)
274
- row_count = getattr(response_data, "row_count", 0)
275
-
276
- if options.on_progress:
277
- options.on_progress(
278
- f"✅ Uploaded {file_name} ({file_size:,} bytes, {row_count:,} rows)"
279
- )
280
-
281
- return UploadResult(
282
- file_id=file_id,
283
- file_size=file_size,
284
- row_count=row_count,
285
- table_name=table_name,
286
- file_name=file_name,
287
- success=True,
288
- )
289
-
290
- except Exception as e:
291
- logger.error(f"Upload failed for {file_name}: {e}")
292
- return UploadResult(
293
- file_id="",
294
- file_size=0,
295
- row_count=0,
296
- table_name=table_name,
297
- file_name=file_name,
298
- success=False,
299
- error=str(e),
300
- )
301
-
302
- def list_staging_tables(self, graph_id: str) -> List[TableInfo]:
303
- """
304
- List all staging tables in a graph.
305
-
306
- Args:
307
- graph_id: The graph ID
308
-
309
- Returns:
310
- List of TableInfo objects
311
- """
312
- try:
313
- from ..client import AuthenticatedClient
314
-
315
- if not self.token:
316
- logger.error("No API key provided")
317
- return []
318
-
319
- client = AuthenticatedClient(
320
- base_url=self.base_url,
321
- token=self.token,
322
- prefix="",
323
- auth_header_name="X-API-Key",
324
- headers=self.headers,
325
- )
326
-
327
- kwargs = {"graph_id": graph_id, "client": client}
328
-
329
- response = list_tables(**kwargs)
330
-
331
- if not response.parsed:
332
- logger.error("Failed to list tables")
333
- return []
334
-
335
- tables = []
336
- for table_data in response.parsed.tables:
337
- tables.append(
338
- TableInfo(
339
- table_name=table_data.table_name,
340
- row_count=table_data.row_count,
341
- file_count=table_data.file_count,
342
- total_size_bytes=table_data.total_size_bytes,
343
- )
344
- )
345
-
346
- return tables
347
-
348
- except Exception as e:
349
- logger.error(f"Failed to list tables: {e}")
350
- return []
351
-
352
- def ingest_all_tables(
353
- self, graph_id: str, options: Optional[IngestOptions] = None
354
- ) -> Dict[str, Any]:
355
- """
356
- Materialize the graph from all staging tables.
357
-
358
- This rebuilds the complete graph database from the current state of DuckDB staging tables.
359
-
360
- Args:
361
- graph_id: The graph ID
362
- options: Ingest options
363
-
364
- Returns:
365
- Dictionary with materialization results
366
- """
367
- if options is None:
368
- options = IngestOptions()
369
-
370
- try:
371
- from ..client import AuthenticatedClient
372
-
373
- if not self.token:
374
- return {"success": False, "error": "No API key provided"}
375
-
376
- client = AuthenticatedClient(
377
- base_url=self.base_url,
378
- token=self.token,
379
- prefix="",
380
- auth_header_name="X-API-Key",
381
- headers=self.headers,
382
- )
383
-
384
- if options.on_progress:
385
- options.on_progress("Starting table materialization...")
386
-
387
- materialize_request = MaterializeRequest(
388
- ignore_errors=options.ignore_errors, rebuild=options.rebuild, force=True
389
- )
390
-
391
- kwargs = {
392
- "graph_id": graph_id,
393
- "client": client,
394
- "body": materialize_request,
395
- }
396
-
397
- response = materialize_graph(**kwargs)
398
-
399
- if not response.parsed:
400
- return {"success": False, "error": "Failed to materialize graph"}
401
-
402
- result = {
403
- "success": True,
404
- "operation_id": getattr(response.parsed, "operation_id", None),
405
- "message": getattr(response.parsed, "message", "Materialization started"),
406
- }
407
-
408
- if options.on_progress:
409
- options.on_progress("✅ Graph materialization completed")
410
-
411
- return result
412
-
413
- except Exception as e:
414
- logger.error(f"Failed to materialize graph: {e}")
415
- return {"success": False, "error": str(e)}
416
-
417
- def upload_and_ingest(
418
- self,
419
- graph_id: str,
420
- table_name: str,
421
- file_path: Path,
422
- upload_options: Optional[UploadOptions] = None,
423
- ingest_options: Optional[IngestOptions] = None,
424
- ) -> Dict[str, Any]:
425
- """
426
- Convenience method to upload a file and immediately ingest it.
427
-
428
- Args:
429
- graph_id: The graph ID
430
- table_name: Name of the staging table
431
- file_path: Path to the Parquet file
432
- upload_options: Upload options
433
- ingest_options: Ingest options
434
-
435
- Returns:
436
- Dictionary with upload and ingest results
437
- """
438
- # Upload the file
439
- upload_result = self.upload_parquet_file(
440
- graph_id, table_name, file_path, upload_options
441
- )
442
-
443
- if not upload_result.success:
444
- return {
445
- "success": False,
446
- "upload": upload_result,
447
- "ingest": None,
448
- "error": upload_result.error,
449
- }
450
-
451
- # Ingest the table
452
- ingest_result = self.ingest_all_tables(graph_id, ingest_options)
453
-
454
- return {
455
- "success": upload_result.success and ingest_result.get("success", False),
456
- "upload": upload_result,
457
- "ingest": ingest_result,
458
- }
459
-
460
- def close(self):
461
- """Close HTTP client connections"""
462
- if self._http_client:
463
- self._http_client.close()