robosystems-client 0.2.16__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of robosystems-client might be problematic. Click here for more details.
- robosystems_client/extensions/__init__.py +28 -10
- robosystems_client/extensions/extensions.py +12 -3
- robosystems_client/extensions/file_client.py +380 -0
- robosystems_client/extensions/materialization_client.py +211 -0
- robosystems_client/extensions/table_client.py +161 -0
- {robosystems_client-0.2.16.dist-info → robosystems_client-0.2.17.dist-info}/METADATA +1 -1
- {robosystems_client-0.2.16.dist-info → robosystems_client-0.2.17.dist-info}/RECORD +9 -7
- robosystems_client/extensions/table_ingest_client.py +0 -463
- {robosystems_client-0.2.16.dist-info → robosystems_client-0.2.17.dist-info}/WHEEL +0 -0
- {robosystems_client-0.2.16.dist-info → robosystems_client-0.2.17.dist-info}/licenses/LICENSE +0 -0
|
@@ -28,12 +28,22 @@ from .operation_client import (
|
|
|
28
28
|
OperationProgress,
|
|
29
29
|
OperationResult,
|
|
30
30
|
)
|
|
31
|
-
from .
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
31
|
+
from .file_client import (
|
|
32
|
+
FileClient,
|
|
33
|
+
FileUploadOptions,
|
|
34
|
+
FileUploadResult,
|
|
35
|
+
FileInfo,
|
|
36
|
+
)
|
|
37
|
+
from .materialization_client import (
|
|
38
|
+
MaterializationClient,
|
|
39
|
+
MaterializationOptions,
|
|
40
|
+
MaterializationResult,
|
|
41
|
+
MaterializationStatus,
|
|
42
|
+
)
|
|
43
|
+
from .table_client import (
|
|
44
|
+
TableClient,
|
|
36
45
|
TableInfo,
|
|
46
|
+
QueryResult as TableQueryResult,
|
|
37
47
|
)
|
|
38
48
|
from .graph_client import (
|
|
39
49
|
GraphClient,
|
|
@@ -177,12 +187,20 @@ __all__ = [
|
|
|
177
187
|
"OperationStatus",
|
|
178
188
|
"OperationProgress",
|
|
179
189
|
"OperationResult",
|
|
180
|
-
#
|
|
181
|
-
"
|
|
182
|
-
"
|
|
183
|
-
"
|
|
184
|
-
"
|
|
190
|
+
# File Client
|
|
191
|
+
"FileClient",
|
|
192
|
+
"FileUploadOptions",
|
|
193
|
+
"FileUploadResult",
|
|
194
|
+
"FileInfo",
|
|
195
|
+
# Materialization Client
|
|
196
|
+
"MaterializationClient",
|
|
197
|
+
"MaterializationOptions",
|
|
198
|
+
"MaterializationResult",
|
|
199
|
+
"MaterializationStatus",
|
|
200
|
+
# Table Client
|
|
201
|
+
"TableClient",
|
|
185
202
|
"TableInfo",
|
|
203
|
+
"TableQueryResult",
|
|
186
204
|
# Graph Client
|
|
187
205
|
"GraphClient",
|
|
188
206
|
"GraphMetadata",
|
|
@@ -9,7 +9,9 @@ from typing import Dict, Any, Optional, Callable
|
|
|
9
9
|
from .query_client import QueryClient
|
|
10
10
|
from .agent_client import AgentClient
|
|
11
11
|
from .operation_client import OperationClient
|
|
12
|
-
from .
|
|
12
|
+
from .file_client import FileClient
|
|
13
|
+
from .materialization_client import MaterializationClient
|
|
14
|
+
from .table_client import TableClient
|
|
13
15
|
from .graph_client import GraphClient
|
|
14
16
|
from .sse_client import SSEClient
|
|
15
17
|
|
|
@@ -61,7 +63,9 @@ class RoboSystemsExtensions:
|
|
|
61
63
|
self.query = QueryClient(self.config)
|
|
62
64
|
self.agent = AgentClient(self.config)
|
|
63
65
|
self.operations = OperationClient(self.config)
|
|
64
|
-
self.
|
|
66
|
+
self.files = FileClient(self.config)
|
|
67
|
+
self.materialization = MaterializationClient(self.config)
|
|
68
|
+
self.tables = TableClient(self.config)
|
|
65
69
|
self.graphs = GraphClient(self.config)
|
|
66
70
|
|
|
67
71
|
def monitor_operation(
|
|
@@ -92,7 +96,12 @@ class RoboSystemsExtensions:
|
|
|
92
96
|
self.query.close()
|
|
93
97
|
self.agent.close()
|
|
94
98
|
self.operations.close_all()
|
|
95
|
-
self.
|
|
99
|
+
if hasattr(self.files, "close"):
|
|
100
|
+
self.files.close()
|
|
101
|
+
if hasattr(self.materialization, "close"):
|
|
102
|
+
self.materialization.close()
|
|
103
|
+
if hasattr(self.tables, "close"):
|
|
104
|
+
self.tables.close()
|
|
96
105
|
self.graphs.close()
|
|
97
106
|
|
|
98
107
|
# Convenience methods that delegate to the appropriate clients
|
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
"""File Client for RoboSystems API
|
|
2
|
+
|
|
3
|
+
Manages file operations as first-class resources with multi-layer status tracking.
|
|
4
|
+
Files are independent entities with their own lifecycle (S3 → DuckDB → Graph).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from io import BytesIO
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Dict, Any, Optional, Callable, Union, BinaryIO
|
|
11
|
+
import logging
|
|
12
|
+
import httpx
|
|
13
|
+
|
|
14
|
+
from ..api.files.create_file_upload import (
|
|
15
|
+
sync_detailed as create_file_upload,
|
|
16
|
+
)
|
|
17
|
+
from ..api.files.update_file import (
|
|
18
|
+
sync_detailed as update_file,
|
|
19
|
+
)
|
|
20
|
+
from ..api.files.list_files import (
|
|
21
|
+
sync_detailed as list_files,
|
|
22
|
+
)
|
|
23
|
+
from ..api.files.get_file import (
|
|
24
|
+
sync_detailed as get_file,
|
|
25
|
+
)
|
|
26
|
+
from ..api.files.delete_file import (
|
|
27
|
+
sync_detailed as delete_file,
|
|
28
|
+
)
|
|
29
|
+
from ..models.file_upload_request import FileUploadRequest
|
|
30
|
+
from ..models.file_status_update import FileStatusUpdate
|
|
31
|
+
|
|
32
|
+
logger = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class FileUploadOptions:
|
|
37
|
+
"""Options for file upload operations"""
|
|
38
|
+
|
|
39
|
+
on_progress: Optional[Callable[[str], None]] = None
|
|
40
|
+
fix_localstack_url: bool = True
|
|
41
|
+
ingest_to_graph: bool = False
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class FileUploadResult:
|
|
46
|
+
"""Result from file upload operation"""
|
|
47
|
+
|
|
48
|
+
file_id: str
|
|
49
|
+
file_size: int
|
|
50
|
+
row_count: int
|
|
51
|
+
table_name: str
|
|
52
|
+
file_name: str
|
|
53
|
+
success: bool = True
|
|
54
|
+
error: Optional[str] = None
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class FileInfo:
|
|
59
|
+
"""Information about a file"""
|
|
60
|
+
|
|
61
|
+
file_id: str
|
|
62
|
+
file_name: str
|
|
63
|
+
file_format: str
|
|
64
|
+
size_bytes: int
|
|
65
|
+
row_count: Optional[int]
|
|
66
|
+
upload_status: str
|
|
67
|
+
table_name: str
|
|
68
|
+
created_at: Optional[str]
|
|
69
|
+
uploaded_at: Optional[str]
|
|
70
|
+
layers: Optional[Dict[str, Any]] = None
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class FileClient:
|
|
74
|
+
"""Client for managing files as first-class resources"""
|
|
75
|
+
|
|
76
|
+
def __init__(self, config: Dict[str, Any]):
|
|
77
|
+
self.config = config
|
|
78
|
+
self.base_url = config["base_url"]
|
|
79
|
+
self.headers = config.get("headers", {})
|
|
80
|
+
self.token = config.get("token")
|
|
81
|
+
self._http_client = httpx.Client(timeout=120.0)
|
|
82
|
+
|
|
83
|
+
def upload(
|
|
84
|
+
self,
|
|
85
|
+
graph_id: str,
|
|
86
|
+
table_name: str,
|
|
87
|
+
file_or_buffer: Union[Path, str, BytesIO, BinaryIO],
|
|
88
|
+
options: Optional[FileUploadOptions] = None,
|
|
89
|
+
) -> FileUploadResult:
|
|
90
|
+
"""
|
|
91
|
+
Upload a file to a table.
|
|
92
|
+
|
|
93
|
+
This handles the complete 3-step upload process:
|
|
94
|
+
1. Get presigned upload URL
|
|
95
|
+
2. Upload file to S3
|
|
96
|
+
3. Mark file as 'uploaded' (triggers DuckDB staging)
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
graph_id: Graph database identifier
|
|
100
|
+
table_name: Table to associate file with
|
|
101
|
+
file_or_buffer: File path, Path object, BytesIO, or file-like object
|
|
102
|
+
options: Upload options (progress callback, LocalStack URL fix, auto-ingest)
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
FileUploadResult with file metadata and status
|
|
106
|
+
"""
|
|
107
|
+
options = options or FileUploadOptions()
|
|
108
|
+
|
|
109
|
+
try:
|
|
110
|
+
# Determine file name and read content
|
|
111
|
+
if isinstance(file_or_buffer, (str, Path)):
|
|
112
|
+
file_path = Path(file_or_buffer)
|
|
113
|
+
file_name = file_path.name
|
|
114
|
+
with open(file_path, "rb") as f:
|
|
115
|
+
file_content = f.read()
|
|
116
|
+
elif isinstance(file_or_buffer, BytesIO):
|
|
117
|
+
file_name = "data.parquet"
|
|
118
|
+
file_content = file_or_buffer.getvalue()
|
|
119
|
+
elif hasattr(file_or_buffer, "read"):
|
|
120
|
+
file_name = getattr(file_or_buffer, "name", "data.parquet")
|
|
121
|
+
file_content = file_or_buffer.read()
|
|
122
|
+
else:
|
|
123
|
+
raise ValueError(f"Unsupported file type: {type(file_or_buffer)}")
|
|
124
|
+
|
|
125
|
+
# Step 1: Get presigned upload URL
|
|
126
|
+
if options.on_progress:
|
|
127
|
+
options.on_progress(
|
|
128
|
+
f"Getting upload URL for {file_name} → table '{table_name}'..."
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
upload_request = FileUploadRequest(
|
|
132
|
+
file_name=file_name,
|
|
133
|
+
content_type="application/x-parquet",
|
|
134
|
+
table_name=table_name,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
kwargs = {
|
|
138
|
+
"graph_id": graph_id,
|
|
139
|
+
"client": self.config.get("client"),
|
|
140
|
+
"body": upload_request,
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
response = create_file_upload(**kwargs)
|
|
144
|
+
|
|
145
|
+
if response.status_code != 200 or not response.parsed:
|
|
146
|
+
error_msg = f"Failed to get upload URL: {response.status_code}"
|
|
147
|
+
return FileUploadResult(
|
|
148
|
+
file_id="",
|
|
149
|
+
file_size=0,
|
|
150
|
+
row_count=0,
|
|
151
|
+
table_name=table_name,
|
|
152
|
+
file_name=file_name,
|
|
153
|
+
success=False,
|
|
154
|
+
error=error_msg,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
upload_data = response.parsed
|
|
158
|
+
upload_url = upload_data.upload_url
|
|
159
|
+
file_id = upload_data.file_id
|
|
160
|
+
|
|
161
|
+
# Fix LocalStack URL if needed
|
|
162
|
+
if options.fix_localstack_url and "localstack:4566" in upload_url:
|
|
163
|
+
upload_url = upload_url.replace("localstack:4566", "localhost:4566")
|
|
164
|
+
|
|
165
|
+
# Step 2: Upload file to S3
|
|
166
|
+
if options.on_progress:
|
|
167
|
+
options.on_progress(f"Uploading {file_name} to S3...")
|
|
168
|
+
|
|
169
|
+
s3_response = self._http_client.put(
|
|
170
|
+
upload_url,
|
|
171
|
+
content=file_content,
|
|
172
|
+
headers={"Content-Type": "application/x-parquet"},
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
if s3_response.status_code not in [200, 204]:
|
|
176
|
+
return FileUploadResult(
|
|
177
|
+
file_id=file_id,
|
|
178
|
+
file_size=len(file_content),
|
|
179
|
+
row_count=0,
|
|
180
|
+
table_name=table_name,
|
|
181
|
+
file_name=file_name,
|
|
182
|
+
success=False,
|
|
183
|
+
error=f"S3 upload failed: {s3_response.status_code}",
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
# Step 3: Mark file as uploaded
|
|
187
|
+
if options.on_progress:
|
|
188
|
+
options.on_progress(f"Marking {file_name} as uploaded...")
|
|
189
|
+
|
|
190
|
+
status_update = FileStatusUpdate(
|
|
191
|
+
status="uploaded",
|
|
192
|
+
ingest_to_graph=options.ingest_to_graph,
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
update_kwargs = {
|
|
196
|
+
"graph_id": graph_id,
|
|
197
|
+
"file_id": file_id,
|
|
198
|
+
"client": self.config.get("client"),
|
|
199
|
+
"body": status_update,
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
update_response = update_file(**update_kwargs)
|
|
203
|
+
|
|
204
|
+
if update_response.status_code != 200 or not update_response.parsed:
|
|
205
|
+
return FileUploadResult(
|
|
206
|
+
file_id=file_id,
|
|
207
|
+
file_size=len(file_content),
|
|
208
|
+
row_count=0,
|
|
209
|
+
table_name=table_name,
|
|
210
|
+
file_name=file_name,
|
|
211
|
+
success=False,
|
|
212
|
+
error="Failed to complete file upload",
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
# Extract metadata from response
|
|
216
|
+
response_data = update_response.parsed
|
|
217
|
+
actual_file_size = getattr(response_data, "file_size_bytes", len(file_content))
|
|
218
|
+
actual_row_count = getattr(response_data, "row_count", 0)
|
|
219
|
+
|
|
220
|
+
if options.on_progress:
|
|
221
|
+
options.on_progress(
|
|
222
|
+
f"✅ Uploaded {file_name} ({actual_file_size:,} bytes, {actual_row_count:,} rows)"
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
return FileUploadResult(
|
|
226
|
+
file_id=file_id,
|
|
227
|
+
file_size=actual_file_size,
|
|
228
|
+
row_count=actual_row_count,
|
|
229
|
+
table_name=table_name,
|
|
230
|
+
file_name=file_name,
|
|
231
|
+
success=True,
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
except Exception as e:
|
|
235
|
+
logger.error(f"File upload failed: {e}")
|
|
236
|
+
return FileUploadResult(
|
|
237
|
+
file_id="",
|
|
238
|
+
file_size=0,
|
|
239
|
+
row_count=0,
|
|
240
|
+
table_name=table_name,
|
|
241
|
+
file_name=getattr(file_or_buffer, "name", "unknown"),
|
|
242
|
+
success=False,
|
|
243
|
+
error=str(e),
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
def list(
|
|
247
|
+
self,
|
|
248
|
+
graph_id: str,
|
|
249
|
+
table_name: Optional[str] = None,
|
|
250
|
+
status: Optional[str] = None,
|
|
251
|
+
) -> list[FileInfo]:
|
|
252
|
+
"""
|
|
253
|
+
List files in a graph with optional filtering.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
graph_id: Graph database identifier
|
|
257
|
+
table_name: Optional table name filter
|
|
258
|
+
status: Optional upload status filter (uploaded, pending, etc.)
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
List of FileInfo objects
|
|
262
|
+
"""
|
|
263
|
+
try:
|
|
264
|
+
kwargs = {
|
|
265
|
+
"graph_id": graph_id,
|
|
266
|
+
"client": self.config.get("client"),
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
if table_name:
|
|
270
|
+
kwargs["table_name"] = table_name
|
|
271
|
+
if status:
|
|
272
|
+
kwargs["status"] = status
|
|
273
|
+
|
|
274
|
+
response = list_files(**kwargs)
|
|
275
|
+
|
|
276
|
+
if response.status_code != 200 or not response.parsed:
|
|
277
|
+
logger.error(f"Failed to list files: {response.status_code}")
|
|
278
|
+
return []
|
|
279
|
+
|
|
280
|
+
files_data = response.parsed
|
|
281
|
+
files = getattr(files_data, "files", [])
|
|
282
|
+
|
|
283
|
+
return [
|
|
284
|
+
FileInfo(
|
|
285
|
+
file_id=f.file_id,
|
|
286
|
+
file_name=f.file_name,
|
|
287
|
+
file_format=f.file_format,
|
|
288
|
+
size_bytes=f.size_bytes or 0,
|
|
289
|
+
row_count=f.row_count,
|
|
290
|
+
upload_status=f.upload_status,
|
|
291
|
+
table_name=getattr(f, "table_name", ""),
|
|
292
|
+
created_at=f.created_at,
|
|
293
|
+
uploaded_at=f.uploaded_at,
|
|
294
|
+
)
|
|
295
|
+
for f in files
|
|
296
|
+
]
|
|
297
|
+
|
|
298
|
+
except Exception as e:
|
|
299
|
+
logger.error(f"Failed to list files: {e}")
|
|
300
|
+
return []
|
|
301
|
+
|
|
302
|
+
def get(self, graph_id: str, file_id: str) -> Optional[FileInfo]:
|
|
303
|
+
"""
|
|
304
|
+
Get detailed information about a specific file.
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
graph_id: Graph database identifier
|
|
308
|
+
file_id: File ID
|
|
309
|
+
|
|
310
|
+
Returns:
|
|
311
|
+
FileInfo with multi-layer status tracking, or None if not found
|
|
312
|
+
"""
|
|
313
|
+
try:
|
|
314
|
+
kwargs = {
|
|
315
|
+
"graph_id": graph_id,
|
|
316
|
+
"file_id": file_id,
|
|
317
|
+
"client": self.config.get("client"),
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
response = get_file(**kwargs)
|
|
321
|
+
|
|
322
|
+
if response.status_code != 200 or not response.parsed:
|
|
323
|
+
logger.error(f"Failed to get file {file_id}: {response.status_code}")
|
|
324
|
+
return None
|
|
325
|
+
|
|
326
|
+
file_data = response.parsed
|
|
327
|
+
|
|
328
|
+
return FileInfo(
|
|
329
|
+
file_id=file_data.file_id,
|
|
330
|
+
file_name=file_data.file_name,
|
|
331
|
+
file_format=file_data.file_format,
|
|
332
|
+
size_bytes=file_data.size_bytes or 0,
|
|
333
|
+
row_count=file_data.row_count,
|
|
334
|
+
upload_status=file_data.upload_status,
|
|
335
|
+
table_name=file_data.table_name or "",
|
|
336
|
+
created_at=file_data.created_at,
|
|
337
|
+
uploaded_at=file_data.uploaded_at,
|
|
338
|
+
layers=getattr(file_data, "layers", None),
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
except Exception as e:
|
|
342
|
+
logger.error(f"Failed to get file {file_id}: {e}")
|
|
343
|
+
return None
|
|
344
|
+
|
|
345
|
+
def delete(self, graph_id: str, file_id: str, cascade: bool = False) -> bool:
|
|
346
|
+
"""
|
|
347
|
+
Delete a file from all layers.
|
|
348
|
+
|
|
349
|
+
Args:
|
|
350
|
+
graph_id: Graph database identifier
|
|
351
|
+
file_id: File ID to delete
|
|
352
|
+
cascade: If True, delete from all layers including DuckDB and graph
|
|
353
|
+
|
|
354
|
+
Returns:
|
|
355
|
+
True if deletion succeeded, False otherwise
|
|
356
|
+
"""
|
|
357
|
+
try:
|
|
358
|
+
kwargs = {
|
|
359
|
+
"graph_id": graph_id,
|
|
360
|
+
"file_id": file_id,
|
|
361
|
+
"client": self.config.get("client"),
|
|
362
|
+
"cascade": cascade,
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
response = delete_file(**kwargs)
|
|
366
|
+
|
|
367
|
+
if response.status_code not in [200, 204]:
|
|
368
|
+
logger.error(f"Failed to delete file {file_id}: {response.status_code}")
|
|
369
|
+
return False
|
|
370
|
+
|
|
371
|
+
return True
|
|
372
|
+
|
|
373
|
+
except Exception as e:
|
|
374
|
+
logger.error(f"Failed to delete file {file_id}: {e}")
|
|
375
|
+
return False
|
|
376
|
+
|
|
377
|
+
def __del__(self):
|
|
378
|
+
"""Cleanup HTTP client on deletion"""
|
|
379
|
+
if hasattr(self, "_http_client"):
|
|
380
|
+
self._http_client.close()
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
"""Materialization Client for RoboSystems API
|
|
2
|
+
|
|
3
|
+
Manages graph materialization from DuckDB staging tables.
|
|
4
|
+
Treats the graph database as a materialized view of the mutable DuckDB data lake.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import Dict, Any, Optional, Callable
|
|
9
|
+
import logging
|
|
10
|
+
|
|
11
|
+
from ..api.materialization.materialize_graph import (
|
|
12
|
+
sync_detailed as materialize_graph,
|
|
13
|
+
)
|
|
14
|
+
from ..api.materialization.get_materialization_status import (
|
|
15
|
+
sync_detailed as get_materialization_status,
|
|
16
|
+
)
|
|
17
|
+
from ..models.materialize_request import MaterializeRequest
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class MaterializationOptions:
|
|
24
|
+
"""Options for graph materialization operations"""
|
|
25
|
+
|
|
26
|
+
ignore_errors: bool = True
|
|
27
|
+
rebuild: bool = False
|
|
28
|
+
force: bool = False
|
|
29
|
+
on_progress: Optional[Callable[[str], None]] = None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class MaterializationResult:
|
|
34
|
+
"""Result from materialization operation"""
|
|
35
|
+
|
|
36
|
+
status: str
|
|
37
|
+
was_stale: bool
|
|
38
|
+
stale_reason: Optional[str]
|
|
39
|
+
tables_materialized: list[str]
|
|
40
|
+
total_rows: int
|
|
41
|
+
execution_time_ms: float
|
|
42
|
+
message: str
|
|
43
|
+
success: bool = True
|
|
44
|
+
error: Optional[str] = None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class MaterializationStatus:
|
|
49
|
+
"""Status information about graph materialization"""
|
|
50
|
+
|
|
51
|
+
graph_id: str
|
|
52
|
+
is_stale: bool
|
|
53
|
+
stale_reason: Optional[str]
|
|
54
|
+
stale_since: Optional[str]
|
|
55
|
+
last_materialized_at: Optional[str]
|
|
56
|
+
materialization_count: int
|
|
57
|
+
hours_since_materialization: Optional[float]
|
|
58
|
+
message: str
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class MaterializationClient:
|
|
62
|
+
"""Client for managing graph materialization operations"""
|
|
63
|
+
|
|
64
|
+
def __init__(self, config: Dict[str, Any]):
|
|
65
|
+
self.config = config
|
|
66
|
+
self.base_url = config["base_url"]
|
|
67
|
+
self.headers = config.get("headers", {})
|
|
68
|
+
self.token = config.get("token")
|
|
69
|
+
|
|
70
|
+
def materialize(
|
|
71
|
+
self,
|
|
72
|
+
graph_id: str,
|
|
73
|
+
options: Optional[MaterializationOptions] = None,
|
|
74
|
+
) -> MaterializationResult:
|
|
75
|
+
"""
|
|
76
|
+
Materialize graph from DuckDB staging tables.
|
|
77
|
+
|
|
78
|
+
Rebuilds the complete graph database from the current state of DuckDB
|
|
79
|
+
staging tables. Automatically discovers all tables, materializes them in
|
|
80
|
+
the correct order (nodes before relationships), and clears the staleness flag.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
graph_id: Graph database identifier
|
|
84
|
+
options: Materialization options (ignore_errors, rebuild, force)
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
MaterializationResult with detailed execution information
|
|
88
|
+
|
|
89
|
+
When to use:
|
|
90
|
+
- After batch uploads (files uploaded with ingest_to_graph=false)
|
|
91
|
+
- After cascade file deletions (graph marked stale)
|
|
92
|
+
- Periodic full refresh to ensure consistency
|
|
93
|
+
- Recovery from partial materialization failures
|
|
94
|
+
"""
|
|
95
|
+
options = options or MaterializationOptions()
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
if options.on_progress:
|
|
99
|
+
options.on_progress("Starting graph materialization...")
|
|
100
|
+
|
|
101
|
+
request = MaterializeRequest(
|
|
102
|
+
ignore_errors=options.ignore_errors,
|
|
103
|
+
rebuild=options.rebuild,
|
|
104
|
+
force=options.force,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
kwargs = {
|
|
108
|
+
"graph_id": graph_id,
|
|
109
|
+
"client": self.config.get("client"),
|
|
110
|
+
"body": request,
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
response = materialize_graph(**kwargs)
|
|
114
|
+
|
|
115
|
+
if response.status_code != 200 or not response.parsed:
|
|
116
|
+
error_msg = f"Materialization failed: {response.status_code}"
|
|
117
|
+
if hasattr(response, "content"):
|
|
118
|
+
try:
|
|
119
|
+
import json
|
|
120
|
+
|
|
121
|
+
error_data = json.loads(response.content)
|
|
122
|
+
error_msg = error_data.get("detail", error_msg)
|
|
123
|
+
except Exception:
|
|
124
|
+
pass
|
|
125
|
+
|
|
126
|
+
return MaterializationResult(
|
|
127
|
+
status="failed",
|
|
128
|
+
was_stale=False,
|
|
129
|
+
stale_reason=None,
|
|
130
|
+
tables_materialized=[],
|
|
131
|
+
total_rows=0,
|
|
132
|
+
execution_time_ms=0,
|
|
133
|
+
message=error_msg,
|
|
134
|
+
success=False,
|
|
135
|
+
error=error_msg,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
result_data = response.parsed
|
|
139
|
+
|
|
140
|
+
if options.on_progress:
|
|
141
|
+
options.on_progress(
|
|
142
|
+
f"✅ Materialization complete: {len(result_data.tables_materialized)} tables, "
|
|
143
|
+
f"{result_data.total_rows:,} rows in {result_data.execution_time_ms:.2f}ms"
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
return MaterializationResult(
|
|
147
|
+
status=result_data.status,
|
|
148
|
+
was_stale=result_data.was_stale,
|
|
149
|
+
stale_reason=result_data.stale_reason,
|
|
150
|
+
tables_materialized=result_data.tables_materialized,
|
|
151
|
+
total_rows=result_data.total_rows,
|
|
152
|
+
execution_time_ms=result_data.execution_time_ms,
|
|
153
|
+
message=result_data.message,
|
|
154
|
+
success=True,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
except Exception as e:
|
|
158
|
+
logger.error(f"Materialization failed: {e}")
|
|
159
|
+
return MaterializationResult(
|
|
160
|
+
status="failed",
|
|
161
|
+
was_stale=False,
|
|
162
|
+
stale_reason=None,
|
|
163
|
+
tables_materialized=[],
|
|
164
|
+
total_rows=0,
|
|
165
|
+
execution_time_ms=0,
|
|
166
|
+
message=str(e),
|
|
167
|
+
success=False,
|
|
168
|
+
error=str(e),
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
def status(self, graph_id: str) -> Optional[MaterializationStatus]:
|
|
172
|
+
"""
|
|
173
|
+
Get current materialization status for the graph.
|
|
174
|
+
|
|
175
|
+
Shows whether the graph is stale (DuckDB has changes not yet in graph database),
|
|
176
|
+
when it was last materialized, and how long since last materialization.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
graph_id: Graph database identifier
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
MaterializationStatus with staleness and timing information
|
|
183
|
+
"""
|
|
184
|
+
try:
|
|
185
|
+
kwargs = {
|
|
186
|
+
"graph_id": graph_id,
|
|
187
|
+
"client": self.config.get("client"),
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
response = get_materialization_status(**kwargs)
|
|
191
|
+
|
|
192
|
+
if response.status_code != 200 or not response.parsed:
|
|
193
|
+
logger.error(f"Failed to get materialization status: {response.status_code}")
|
|
194
|
+
return None
|
|
195
|
+
|
|
196
|
+
status_data = response.parsed
|
|
197
|
+
|
|
198
|
+
return MaterializationStatus(
|
|
199
|
+
graph_id=status_data.graph_id,
|
|
200
|
+
is_stale=status_data.is_stale,
|
|
201
|
+
stale_reason=status_data.stale_reason,
|
|
202
|
+
stale_since=status_data.stale_since,
|
|
203
|
+
last_materialized_at=status_data.last_materialized_at,
|
|
204
|
+
materialization_count=status_data.materialization_count,
|
|
205
|
+
hours_since_materialization=status_data.hours_since_materialization,
|
|
206
|
+
message=status_data.message,
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
except Exception as e:
|
|
210
|
+
logger.error(f"Failed to get materialization status: {e}")
|
|
211
|
+
return None
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"""Table Client for RoboSystems API
|
|
2
|
+
|
|
3
|
+
Manages DuckDB staging table operations.
|
|
4
|
+
Tables provide SQL-queryable staging layer before graph materialization.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import Dict, Any, Optional
|
|
9
|
+
import logging
|
|
10
|
+
|
|
11
|
+
from ..api.tables.list_tables import (
|
|
12
|
+
sync_detailed as list_tables,
|
|
13
|
+
)
|
|
14
|
+
from ..api.tables.query_tables import (
|
|
15
|
+
sync_detailed as query_tables,
|
|
16
|
+
)
|
|
17
|
+
from ..models.table_query_request import TableQueryRequest
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class TableInfo:
|
|
24
|
+
"""Information about a DuckDB staging table"""
|
|
25
|
+
|
|
26
|
+
table_name: str
|
|
27
|
+
table_type: str
|
|
28
|
+
row_count: int
|
|
29
|
+
file_count: int
|
|
30
|
+
total_size_bytes: int
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class QueryResult:
|
|
35
|
+
"""Result from SQL query execution"""
|
|
36
|
+
|
|
37
|
+
columns: list[str]
|
|
38
|
+
rows: list[list[Any]]
|
|
39
|
+
row_count: int
|
|
40
|
+
execution_time_ms: float
|
|
41
|
+
success: bool = True
|
|
42
|
+
error: Optional[str] = None
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class TableClient:
|
|
46
|
+
"""Client for managing DuckDB staging tables"""
|
|
47
|
+
|
|
48
|
+
def __init__(self, config: Dict[str, Any]):
|
|
49
|
+
self.config = config
|
|
50
|
+
self.base_url = config["base_url"]
|
|
51
|
+
self.headers = config.get("headers", {})
|
|
52
|
+
self.token = config.get("token")
|
|
53
|
+
|
|
54
|
+
def list(self, graph_id: str) -> list[TableInfo]:
|
|
55
|
+
"""
|
|
56
|
+
List all DuckDB staging tables in a graph.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
graph_id: Graph database identifier
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
List of TableInfo objects with metadata
|
|
63
|
+
"""
|
|
64
|
+
try:
|
|
65
|
+
kwargs = {
|
|
66
|
+
"graph_id": graph_id,
|
|
67
|
+
"client": self.config.get("client"),
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
response = list_tables(**kwargs)
|
|
71
|
+
|
|
72
|
+
if response.status_code != 200 or not response.parsed:
|
|
73
|
+
logger.error(f"Failed to list tables: {response.status_code}")
|
|
74
|
+
return []
|
|
75
|
+
|
|
76
|
+
table_data = response.parsed
|
|
77
|
+
tables = getattr(table_data, "tables", [])
|
|
78
|
+
|
|
79
|
+
return [
|
|
80
|
+
TableInfo(
|
|
81
|
+
table_name=t.table_name,
|
|
82
|
+
table_type=t.table_type,
|
|
83
|
+
row_count=t.row_count,
|
|
84
|
+
file_count=t.file_count or 0,
|
|
85
|
+
total_size_bytes=t.total_size_bytes or 0,
|
|
86
|
+
)
|
|
87
|
+
for t in tables
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
except Exception as e:
|
|
91
|
+
logger.error(f"Failed to list tables: {e}")
|
|
92
|
+
return []
|
|
93
|
+
|
|
94
|
+
def query(
|
|
95
|
+
self, graph_id: str, sql_query: str, limit: Optional[int] = None
|
|
96
|
+
) -> QueryResult:
|
|
97
|
+
"""
|
|
98
|
+
Execute SQL query against DuckDB staging tables.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
graph_id: Graph database identifier
|
|
102
|
+
sql_query: SQL query to execute
|
|
103
|
+
limit: Optional row limit
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
QueryResult with columns and rows
|
|
107
|
+
|
|
108
|
+
Example:
|
|
109
|
+
>>> result = client.tables.query(
|
|
110
|
+
... graph_id,
|
|
111
|
+
... "SELECT * FROM Entity WHERE entity_type = 'CORPORATION'"
|
|
112
|
+
... )
|
|
113
|
+
>>> for row in result.rows:
|
|
114
|
+
... print(row)
|
|
115
|
+
"""
|
|
116
|
+
try:
|
|
117
|
+
final_query = sql_query
|
|
118
|
+
if limit is not None:
|
|
119
|
+
final_query = f"{sql_query.rstrip(';')} LIMIT {limit}"
|
|
120
|
+
|
|
121
|
+
request = TableQueryRequest(sql=final_query)
|
|
122
|
+
|
|
123
|
+
kwargs = {
|
|
124
|
+
"graph_id": graph_id,
|
|
125
|
+
"client": self.config.get("client"),
|
|
126
|
+
"body": request,
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
response = query_tables(**kwargs)
|
|
130
|
+
|
|
131
|
+
if response.status_code != 200 or not response.parsed:
|
|
132
|
+
error_msg = f"Query failed: {response.status_code}"
|
|
133
|
+
return QueryResult(
|
|
134
|
+
columns=[],
|
|
135
|
+
rows=[],
|
|
136
|
+
row_count=0,
|
|
137
|
+
execution_time_ms=0,
|
|
138
|
+
success=False,
|
|
139
|
+
error=error_msg,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
result_data = response.parsed
|
|
143
|
+
|
|
144
|
+
return QueryResult(
|
|
145
|
+
columns=result_data.columns,
|
|
146
|
+
rows=result_data.rows,
|
|
147
|
+
row_count=len(result_data.rows),
|
|
148
|
+
execution_time_ms=getattr(result_data, "execution_time_ms", 0),
|
|
149
|
+
success=True,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
except Exception as e:
|
|
153
|
+
logger.error(f"Query failed: {e}")
|
|
154
|
+
return QueryResult(
|
|
155
|
+
columns=[],
|
|
156
|
+
rows=[],
|
|
157
|
+
row_count=0,
|
|
158
|
+
execution_time_ms=0,
|
|
159
|
+
success=False,
|
|
160
|
+
error=str(e),
|
|
161
|
+
)
|
|
@@ -142,18 +142,20 @@ robosystems_client/api/views/__init__.py,sha256=5vd9uJWAjRqa9xzxzYkLD1yoZ12Ld_bA
|
|
|
142
142
|
robosystems_client/api/views/create_view.py,sha256=aBjk0FsAj6A3K2iqjE7DcTLKjDRsxegFbs5RC0hmbqA,6316
|
|
143
143
|
robosystems_client/api/views/save_view.py,sha256=T7PUUNvp0JIeOYWG2n8Nm4Y9hhEWV7i-Gl2Vl8Oz4Ls,9224
|
|
144
144
|
robosystems_client/extensions/README.md,sha256=qfHFjdgA_J-zNXziNZE6M1MKJiwVkocBi01w_HhvzEk,16136
|
|
145
|
-
robosystems_client/extensions/__init__.py,sha256=
|
|
145
|
+
robosystems_client/extensions/__init__.py,sha256=eTuJQGygQTOWC51YVhJOWUWFUMLcPo7MpZ0H3GaxoR0,7076
|
|
146
146
|
robosystems_client/extensions/agent_client.py,sha256=Db2C4hrakVsf6ScnBcNk6rte3Kwn4cQBEHsR_joWMTs,17750
|
|
147
147
|
robosystems_client/extensions/auth_integration.py,sha256=ABOJ8aVjfHehNGNzim1iR9-Cdh7Mr22ce-WgWWeqJt0,6535
|
|
148
148
|
robosystems_client/extensions/dataframe_utils.py,sha256=gK1bgkVqBF0TvWVdGQvqWrt-ur_Rw11j8uNtMoulLWE,12312
|
|
149
149
|
robosystems_client/extensions/element_mapping_client.py,sha256=yuh0QPQBPM33E7r6QWWDiKm3T4TfCdbn2kvO3Jlw4Cs,18516
|
|
150
|
-
robosystems_client/extensions/extensions.py,sha256=
|
|
150
|
+
robosystems_client/extensions/extensions.py,sha256=7vsD3QeIKbwhC1UqNskFjsfKkg_ZO3PPDnc6TxV3PoA,6722
|
|
151
|
+
robosystems_client/extensions/file_client.py,sha256=WyAp0uOGb_wVI_MS8tqg1FjrtNH5o88u4BdR1QJ3Qz0,10586
|
|
151
152
|
robosystems_client/extensions/graph_client.py,sha256=OBi0xj0SLIRKLeSu_DiGt2ZakCmhggvNrMP3jdRfEgQ,10326
|
|
153
|
+
robosystems_client/extensions/materialization_client.py,sha256=xKrLlNt8jR6lwhv5OW8fg7fRQL100zXcZeZ23s7Oih0,6181
|
|
152
154
|
robosystems_client/extensions/operation_client.py,sha256=B1qju-wWQrnrnVJixKGgsA_KEInviwJwdlJxzm_i7P0,13359
|
|
153
155
|
robosystems_client/extensions/query_client.py,sha256=cX3e8EBoTeg4Lwm6edJYRULM2UmGpfqNX3f48S8TQbE,19430
|
|
154
156
|
robosystems_client/extensions/sse_client.py,sha256=XvQIq3JQ0Yiax11E7cwclhupShYOpEMURM2cYQodiz8,15058
|
|
155
157
|
robosystems_client/extensions/subgraph_workspace_client.py,sha256=Ioc7FNJEKaD_kAJBeymwtFlVI-U9t47RouD5ibUHv4g,24036
|
|
156
|
-
robosystems_client/extensions/
|
|
158
|
+
robosystems_client/extensions/table_client.py,sha256=YXddCxHUPGKd89he2koYT2KR7pwZF4WBcbn0innLjuY,3883
|
|
157
159
|
robosystems_client/extensions/token_utils.py,sha256=qCK_s1vBzRnSYwtgncPZRLJVIw3WXmzqNTWjdEEpdgs,10899
|
|
158
160
|
robosystems_client/extensions/utils.py,sha256=vhmUnEsq-UEAMgNhmkqlbJg4oJj096QPiHALEHJ-y4A,16207
|
|
159
161
|
robosystems_client/extensions/view_builder_client.py,sha256=E1LSiDHAvPf2IhifGOliOAwk5vJyu5PWAnr8ZnyulZM,18590
|
|
@@ -412,7 +414,7 @@ robosystems_client/models/view_axis_config_member_labels_type_0.py,sha256=kkzpHx
|
|
|
412
414
|
robosystems_client/models/view_config.py,sha256=HQnqYjLMXRhjZLOc5ypwILriMFKuvPzu0hPQi2vyNoM,3795
|
|
413
415
|
robosystems_client/models/view_source.py,sha256=h66cASj-P_-qOptKv26uAIe9PtIewU2nTs42Ls-lFFk,4098
|
|
414
416
|
robosystems_client/models/view_source_type.py,sha256=KpgczHUeOinV01jdLvytZ2URKwcsRcp1doPx2D3USyw,169
|
|
415
|
-
robosystems_client-0.2.
|
|
416
|
-
robosystems_client-0.2.
|
|
417
|
-
robosystems_client-0.2.
|
|
418
|
-
robosystems_client-0.2.
|
|
417
|
+
robosystems_client-0.2.17.dist-info/METADATA,sha256=jrEwiwDPG0E1MImeUOVhTJcE1rca6-k2t2THn1L4By4,3904
|
|
418
|
+
robosystems_client-0.2.17.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
419
|
+
robosystems_client-0.2.17.dist-info/licenses/LICENSE,sha256=LjFqQPU4eQh7jAQ04SmE9eC0j74HCdXvzbo0hjW4mWo,1063
|
|
420
|
+
robosystems_client-0.2.17.dist-info/RECORD,,
|
|
@@ -1,463 +0,0 @@
|
|
|
1
|
-
"""Table Ingest Client for RoboSystems API
|
|
2
|
-
|
|
3
|
-
Simplifies uploading Parquet files to staging tables and ingesting them into graphs.
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
from dataclasses import dataclass
|
|
7
|
-
from io import BytesIO
|
|
8
|
-
from pathlib import Path
|
|
9
|
-
from typing import Dict, Any, Optional, Callable, List, Union, BinaryIO
|
|
10
|
-
import json
|
|
11
|
-
import logging
|
|
12
|
-
import httpx
|
|
13
|
-
|
|
14
|
-
from ..api.files.create_file_upload import (
|
|
15
|
-
sync_detailed as create_file_upload,
|
|
16
|
-
)
|
|
17
|
-
from ..api.files.update_file import (
|
|
18
|
-
sync_detailed as update_file,
|
|
19
|
-
)
|
|
20
|
-
from ..api.tables.list_tables import (
|
|
21
|
-
sync_detailed as list_tables,
|
|
22
|
-
)
|
|
23
|
-
from ..api.materialization.materialize_graph import (
|
|
24
|
-
sync_detailed as materialize_graph,
|
|
25
|
-
)
|
|
26
|
-
from ..models.file_upload_request import FileUploadRequest
|
|
27
|
-
from ..models.file_status_update import FileStatusUpdate
|
|
28
|
-
from ..models.materialize_request import MaterializeRequest
|
|
29
|
-
|
|
30
|
-
logger = logging.getLogger(__name__)
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
@dataclass
|
|
34
|
-
class UploadOptions:
|
|
35
|
-
"""Options for file upload operations"""
|
|
36
|
-
|
|
37
|
-
on_progress: Optional[Callable[[str], None]] = None
|
|
38
|
-
fix_localstack_url: bool = True # Auto-fix LocalStack URLs for localhost
|
|
39
|
-
file_name: Optional[str] = None # Override file name (useful for buffer uploads)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
@dataclass
|
|
43
|
-
class IngestOptions:
|
|
44
|
-
"""Options for table ingestion operations"""
|
|
45
|
-
|
|
46
|
-
ignore_errors: bool = True
|
|
47
|
-
rebuild: bool = False
|
|
48
|
-
on_progress: Optional[Callable[[str], None]] = None
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
@dataclass
|
|
52
|
-
class UploadResult:
|
|
53
|
-
"""Result from file upload operation"""
|
|
54
|
-
|
|
55
|
-
file_id: str
|
|
56
|
-
file_size: int
|
|
57
|
-
row_count: int
|
|
58
|
-
table_name: str
|
|
59
|
-
file_name: str
|
|
60
|
-
success: bool = True
|
|
61
|
-
error: Optional[str] = None
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
@dataclass
|
|
65
|
-
class TableInfo:
|
|
66
|
-
"""Information about a staging table"""
|
|
67
|
-
|
|
68
|
-
table_name: str
|
|
69
|
-
row_count: int
|
|
70
|
-
file_count: int
|
|
71
|
-
total_size_bytes: int
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
class TableIngestClient:
|
|
75
|
-
"""Enhanced table ingest client with simplified upload workflow"""
|
|
76
|
-
|
|
77
|
-
def __init__(self, config: Dict[str, Any]):
|
|
78
|
-
self.config = config
|
|
79
|
-
self.base_url = config["base_url"]
|
|
80
|
-
self.headers = config.get("headers", {})
|
|
81
|
-
self.token = config.get("token")
|
|
82
|
-
# Create httpx client for S3 uploads
|
|
83
|
-
self._http_client = httpx.Client(timeout=120.0)
|
|
84
|
-
|
|
85
|
-
def upload_parquet_file(
|
|
86
|
-
self,
|
|
87
|
-
graph_id: str,
|
|
88
|
-
table_name: str,
|
|
89
|
-
file_or_buffer: Union[Path, str, BytesIO, BinaryIO],
|
|
90
|
-
options: Optional[UploadOptions] = None,
|
|
91
|
-
) -> UploadResult:
|
|
92
|
-
"""
|
|
93
|
-
Upload a Parquet file to a staging table.
|
|
94
|
-
|
|
95
|
-
This method handles the complete 3-step upload process:
|
|
96
|
-
1. Get presigned upload URL
|
|
97
|
-
2. Upload file to S3
|
|
98
|
-
3. Mark file as 'uploaded' (backend validates, calculates size/row count)
|
|
99
|
-
|
|
100
|
-
Args:
|
|
101
|
-
graph_id: The graph ID
|
|
102
|
-
table_name: Name of the staging table
|
|
103
|
-
file_or_buffer: Path to the Parquet file or BytesIO/BinaryIO buffer
|
|
104
|
-
options: Upload options
|
|
105
|
-
|
|
106
|
-
Returns:
|
|
107
|
-
UploadResult with upload details (size/row count calculated by backend)
|
|
108
|
-
"""
|
|
109
|
-
if options is None:
|
|
110
|
-
options = UploadOptions()
|
|
111
|
-
|
|
112
|
-
# Auto-detect if this is a file path or buffer
|
|
113
|
-
is_buffer = isinstance(file_or_buffer, (BytesIO, BinaryIO)) or hasattr(
|
|
114
|
-
file_or_buffer, "read"
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
# Initialize file_path for type checking
|
|
118
|
-
file_path: Optional[Path] = None
|
|
119
|
-
|
|
120
|
-
if is_buffer:
|
|
121
|
-
# Handle buffer upload
|
|
122
|
-
file_name = options.file_name or "data.parquet"
|
|
123
|
-
else:
|
|
124
|
-
# Handle file path upload
|
|
125
|
-
file_path = Path(file_or_buffer)
|
|
126
|
-
file_name = file_path.name
|
|
127
|
-
if not file_path.exists():
|
|
128
|
-
return UploadResult(
|
|
129
|
-
file_id="",
|
|
130
|
-
file_size=0,
|
|
131
|
-
row_count=0,
|
|
132
|
-
table_name=table_name,
|
|
133
|
-
file_name=file_name,
|
|
134
|
-
success=False,
|
|
135
|
-
error=f"File not found: {file_path}",
|
|
136
|
-
)
|
|
137
|
-
|
|
138
|
-
try:
|
|
139
|
-
# Import client here to avoid circular imports
|
|
140
|
-
from ..client import AuthenticatedClient
|
|
141
|
-
|
|
142
|
-
# Create authenticated client with X-API-Key
|
|
143
|
-
# The token is extracted from X-API-Key header in extensions.py
|
|
144
|
-
if not self.token:
|
|
145
|
-
return UploadResult(
|
|
146
|
-
file_id="",
|
|
147
|
-
file_size=0,
|
|
148
|
-
row_count=0,
|
|
149
|
-
table_name=table_name,
|
|
150
|
-
file_name=file_name,
|
|
151
|
-
success=False,
|
|
152
|
-
error="No API key provided. Set X-API-Key in headers.",
|
|
153
|
-
)
|
|
154
|
-
|
|
155
|
-
client = AuthenticatedClient(
|
|
156
|
-
base_url=self.base_url,
|
|
157
|
-
token=self.token,
|
|
158
|
-
prefix="", # No prefix for X-API-Key
|
|
159
|
-
auth_header_name="X-API-Key", # Use X-API-Key header instead of Authorization
|
|
160
|
-
headers=self.headers,
|
|
161
|
-
)
|
|
162
|
-
|
|
163
|
-
# Step 1: Get presigned upload URL
|
|
164
|
-
if options.on_progress:
|
|
165
|
-
options.on_progress(
|
|
166
|
-
f"Getting upload URL for {file_name} -> table '{table_name}'..."
|
|
167
|
-
)
|
|
168
|
-
|
|
169
|
-
upload_request = FileUploadRequest(
|
|
170
|
-
file_name=file_name, content_type="application/x-parquet", table_name=table_name
|
|
171
|
-
)
|
|
172
|
-
|
|
173
|
-
kwargs = {
|
|
174
|
-
"graph_id": graph_id,
|
|
175
|
-
"client": client,
|
|
176
|
-
"body": upload_request,
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
response = create_file_upload(**kwargs)
|
|
180
|
-
|
|
181
|
-
if not response.parsed:
|
|
182
|
-
error_msg = f"Failed to get upload URL (status: {response.status_code})"
|
|
183
|
-
if hasattr(response, "content"):
|
|
184
|
-
try:
|
|
185
|
-
error_detail = json.loads(response.content)
|
|
186
|
-
error_msg = f"{error_msg}: {error_detail}"
|
|
187
|
-
except (json.JSONDecodeError, ValueError):
|
|
188
|
-
error_msg = f"{error_msg}: {response.content[:200]}"
|
|
189
|
-
|
|
190
|
-
return UploadResult(
|
|
191
|
-
file_id="",
|
|
192
|
-
file_size=0,
|
|
193
|
-
row_count=0,
|
|
194
|
-
table_name=table_name,
|
|
195
|
-
file_name=file_name,
|
|
196
|
-
success=False,
|
|
197
|
-
error=error_msg,
|
|
198
|
-
)
|
|
199
|
-
|
|
200
|
-
upload_url = response.parsed.upload_url
|
|
201
|
-
file_id = response.parsed.file_id
|
|
202
|
-
|
|
203
|
-
# Fix LocalStack URL if needed
|
|
204
|
-
if options.fix_localstack_url and "localstack:4566" in upload_url:
|
|
205
|
-
upload_url = upload_url.replace("localstack:4566", "localhost:4566")
|
|
206
|
-
logger.debug("Fixed LocalStack URL for localhost access")
|
|
207
|
-
|
|
208
|
-
# Step 2: Upload file to S3
|
|
209
|
-
if options.on_progress:
|
|
210
|
-
options.on_progress(f"Uploading {file_name} to S3...")
|
|
211
|
-
|
|
212
|
-
# Read file content - handle both paths and buffers
|
|
213
|
-
if is_buffer:
|
|
214
|
-
# Read from buffer
|
|
215
|
-
if hasattr(file_or_buffer, "getvalue"):
|
|
216
|
-
file_content = file_or_buffer.getvalue()
|
|
217
|
-
else:
|
|
218
|
-
# BinaryIO or file-like object
|
|
219
|
-
file_or_buffer.seek(0)
|
|
220
|
-
file_content = file_or_buffer.read()
|
|
221
|
-
else:
|
|
222
|
-
# Read from file path
|
|
223
|
-
if file_path is None:
|
|
224
|
-
raise ValueError("file_path should not be None when not using buffer")
|
|
225
|
-
with open(file_path, "rb") as f:
|
|
226
|
-
file_content = f.read()
|
|
227
|
-
|
|
228
|
-
s3_response = self._http_client.put(
|
|
229
|
-
upload_url,
|
|
230
|
-
content=file_content,
|
|
231
|
-
headers={"Content-Type": "application/x-parquet"},
|
|
232
|
-
)
|
|
233
|
-
s3_response.raise_for_status()
|
|
234
|
-
|
|
235
|
-
# Step 3: Mark file as uploaded (backend validates and calculates size/row count)
|
|
236
|
-
if options.on_progress:
|
|
237
|
-
options.on_progress(f"Marking {file_name} as uploaded...")
|
|
238
|
-
|
|
239
|
-
status_update = FileStatusUpdate(status="uploaded")
|
|
240
|
-
|
|
241
|
-
kwargs = {
|
|
242
|
-
"graph_id": graph_id,
|
|
243
|
-
"file_id": file_id,
|
|
244
|
-
"client": client,
|
|
245
|
-
"body": status_update,
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
update_response = update_file(**kwargs)
|
|
249
|
-
|
|
250
|
-
if not update_response.parsed:
|
|
251
|
-
logger.error(
|
|
252
|
-
f"No parsed response from update_file. Status code: {update_response.status_code}"
|
|
253
|
-
)
|
|
254
|
-
return UploadResult(
|
|
255
|
-
file_id=file_id,
|
|
256
|
-
file_size=0,
|
|
257
|
-
row_count=0,
|
|
258
|
-
table_name=table_name,
|
|
259
|
-
file_name=file_name,
|
|
260
|
-
success=False,
|
|
261
|
-
error="Failed to complete file upload",
|
|
262
|
-
)
|
|
263
|
-
|
|
264
|
-
response_data = update_response.parsed
|
|
265
|
-
|
|
266
|
-
if isinstance(response_data, dict):
|
|
267
|
-
file_size = response_data.get("file_size_bytes", 0)
|
|
268
|
-
row_count = response_data.get("row_count", 0)
|
|
269
|
-
elif hasattr(response_data, "additional_properties"):
|
|
270
|
-
file_size = response_data.additional_properties.get("file_size_bytes", 0)
|
|
271
|
-
row_count = response_data.additional_properties.get("row_count", 0)
|
|
272
|
-
else:
|
|
273
|
-
file_size = getattr(response_data, "file_size_bytes", 0)
|
|
274
|
-
row_count = getattr(response_data, "row_count", 0)
|
|
275
|
-
|
|
276
|
-
if options.on_progress:
|
|
277
|
-
options.on_progress(
|
|
278
|
-
f"✅ Uploaded {file_name} ({file_size:,} bytes, {row_count:,} rows)"
|
|
279
|
-
)
|
|
280
|
-
|
|
281
|
-
return UploadResult(
|
|
282
|
-
file_id=file_id,
|
|
283
|
-
file_size=file_size,
|
|
284
|
-
row_count=row_count,
|
|
285
|
-
table_name=table_name,
|
|
286
|
-
file_name=file_name,
|
|
287
|
-
success=True,
|
|
288
|
-
)
|
|
289
|
-
|
|
290
|
-
except Exception as e:
|
|
291
|
-
logger.error(f"Upload failed for {file_name}: {e}")
|
|
292
|
-
return UploadResult(
|
|
293
|
-
file_id="",
|
|
294
|
-
file_size=0,
|
|
295
|
-
row_count=0,
|
|
296
|
-
table_name=table_name,
|
|
297
|
-
file_name=file_name,
|
|
298
|
-
success=False,
|
|
299
|
-
error=str(e),
|
|
300
|
-
)
|
|
301
|
-
|
|
302
|
-
def list_staging_tables(self, graph_id: str) -> List[TableInfo]:
|
|
303
|
-
"""
|
|
304
|
-
List all staging tables in a graph.
|
|
305
|
-
|
|
306
|
-
Args:
|
|
307
|
-
graph_id: The graph ID
|
|
308
|
-
|
|
309
|
-
Returns:
|
|
310
|
-
List of TableInfo objects
|
|
311
|
-
"""
|
|
312
|
-
try:
|
|
313
|
-
from ..client import AuthenticatedClient
|
|
314
|
-
|
|
315
|
-
if not self.token:
|
|
316
|
-
logger.error("No API key provided")
|
|
317
|
-
return []
|
|
318
|
-
|
|
319
|
-
client = AuthenticatedClient(
|
|
320
|
-
base_url=self.base_url,
|
|
321
|
-
token=self.token,
|
|
322
|
-
prefix="",
|
|
323
|
-
auth_header_name="X-API-Key",
|
|
324
|
-
headers=self.headers,
|
|
325
|
-
)
|
|
326
|
-
|
|
327
|
-
kwargs = {"graph_id": graph_id, "client": client}
|
|
328
|
-
|
|
329
|
-
response = list_tables(**kwargs)
|
|
330
|
-
|
|
331
|
-
if not response.parsed:
|
|
332
|
-
logger.error("Failed to list tables")
|
|
333
|
-
return []
|
|
334
|
-
|
|
335
|
-
tables = []
|
|
336
|
-
for table_data in response.parsed.tables:
|
|
337
|
-
tables.append(
|
|
338
|
-
TableInfo(
|
|
339
|
-
table_name=table_data.table_name,
|
|
340
|
-
row_count=table_data.row_count,
|
|
341
|
-
file_count=table_data.file_count,
|
|
342
|
-
total_size_bytes=table_data.total_size_bytes,
|
|
343
|
-
)
|
|
344
|
-
)
|
|
345
|
-
|
|
346
|
-
return tables
|
|
347
|
-
|
|
348
|
-
except Exception as e:
|
|
349
|
-
logger.error(f"Failed to list tables: {e}")
|
|
350
|
-
return []
|
|
351
|
-
|
|
352
|
-
def ingest_all_tables(
|
|
353
|
-
self, graph_id: str, options: Optional[IngestOptions] = None
|
|
354
|
-
) -> Dict[str, Any]:
|
|
355
|
-
"""
|
|
356
|
-
Materialize the graph from all staging tables.
|
|
357
|
-
|
|
358
|
-
This rebuilds the complete graph database from the current state of DuckDB staging tables.
|
|
359
|
-
|
|
360
|
-
Args:
|
|
361
|
-
graph_id: The graph ID
|
|
362
|
-
options: Ingest options
|
|
363
|
-
|
|
364
|
-
Returns:
|
|
365
|
-
Dictionary with materialization results
|
|
366
|
-
"""
|
|
367
|
-
if options is None:
|
|
368
|
-
options = IngestOptions()
|
|
369
|
-
|
|
370
|
-
try:
|
|
371
|
-
from ..client import AuthenticatedClient
|
|
372
|
-
|
|
373
|
-
if not self.token:
|
|
374
|
-
return {"success": False, "error": "No API key provided"}
|
|
375
|
-
|
|
376
|
-
client = AuthenticatedClient(
|
|
377
|
-
base_url=self.base_url,
|
|
378
|
-
token=self.token,
|
|
379
|
-
prefix="",
|
|
380
|
-
auth_header_name="X-API-Key",
|
|
381
|
-
headers=self.headers,
|
|
382
|
-
)
|
|
383
|
-
|
|
384
|
-
if options.on_progress:
|
|
385
|
-
options.on_progress("Starting table materialization...")
|
|
386
|
-
|
|
387
|
-
materialize_request = MaterializeRequest(
|
|
388
|
-
ignore_errors=options.ignore_errors, rebuild=options.rebuild, force=True
|
|
389
|
-
)
|
|
390
|
-
|
|
391
|
-
kwargs = {
|
|
392
|
-
"graph_id": graph_id,
|
|
393
|
-
"client": client,
|
|
394
|
-
"body": materialize_request,
|
|
395
|
-
}
|
|
396
|
-
|
|
397
|
-
response = materialize_graph(**kwargs)
|
|
398
|
-
|
|
399
|
-
if not response.parsed:
|
|
400
|
-
return {"success": False, "error": "Failed to materialize graph"}
|
|
401
|
-
|
|
402
|
-
result = {
|
|
403
|
-
"success": True,
|
|
404
|
-
"operation_id": getattr(response.parsed, "operation_id", None),
|
|
405
|
-
"message": getattr(response.parsed, "message", "Materialization started"),
|
|
406
|
-
}
|
|
407
|
-
|
|
408
|
-
if options.on_progress:
|
|
409
|
-
options.on_progress("✅ Graph materialization completed")
|
|
410
|
-
|
|
411
|
-
return result
|
|
412
|
-
|
|
413
|
-
except Exception as e:
|
|
414
|
-
logger.error(f"Failed to materialize graph: {e}")
|
|
415
|
-
return {"success": False, "error": str(e)}
|
|
416
|
-
|
|
417
|
-
def upload_and_ingest(
|
|
418
|
-
self,
|
|
419
|
-
graph_id: str,
|
|
420
|
-
table_name: str,
|
|
421
|
-
file_path: Path,
|
|
422
|
-
upload_options: Optional[UploadOptions] = None,
|
|
423
|
-
ingest_options: Optional[IngestOptions] = None,
|
|
424
|
-
) -> Dict[str, Any]:
|
|
425
|
-
"""
|
|
426
|
-
Convenience method to upload a file and immediately ingest it.
|
|
427
|
-
|
|
428
|
-
Args:
|
|
429
|
-
graph_id: The graph ID
|
|
430
|
-
table_name: Name of the staging table
|
|
431
|
-
file_path: Path to the Parquet file
|
|
432
|
-
upload_options: Upload options
|
|
433
|
-
ingest_options: Ingest options
|
|
434
|
-
|
|
435
|
-
Returns:
|
|
436
|
-
Dictionary with upload and ingest results
|
|
437
|
-
"""
|
|
438
|
-
# Upload the file
|
|
439
|
-
upload_result = self.upload_parquet_file(
|
|
440
|
-
graph_id, table_name, file_path, upload_options
|
|
441
|
-
)
|
|
442
|
-
|
|
443
|
-
if not upload_result.success:
|
|
444
|
-
return {
|
|
445
|
-
"success": False,
|
|
446
|
-
"upload": upload_result,
|
|
447
|
-
"ingest": None,
|
|
448
|
-
"error": upload_result.error,
|
|
449
|
-
}
|
|
450
|
-
|
|
451
|
-
# Ingest the table
|
|
452
|
-
ingest_result = self.ingest_all_tables(graph_id, ingest_options)
|
|
453
|
-
|
|
454
|
-
return {
|
|
455
|
-
"success": upload_result.success and ingest_result.get("success", False),
|
|
456
|
-
"upload": upload_result,
|
|
457
|
-
"ingest": ingest_result,
|
|
458
|
-
}
|
|
459
|
-
|
|
460
|
-
def close(self):
|
|
461
|
-
"""Close HTTP client connections"""
|
|
462
|
-
if self._http_client:
|
|
463
|
-
self._http_client.close()
|
|
File without changes
|
{robosystems_client-0.2.16.dist-info → robosystems_client-0.2.17.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|