robosystems-client 0.1.19__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of robosystems-client might be problematic. Click here for more details.
- robosystems_client/api/agent/auto_select_agent.py +9 -3
- robosystems_client/api/agent/batch_process_queries.py +8 -3
- robosystems_client/api/agent/execute_specific_agent.py +10 -3
- robosystems_client/api/agent/get_agent_metadata.py +3 -0
- robosystems_client/api/agent/list_agents.py +3 -0
- robosystems_client/api/agent/recommend_agent.py +3 -0
- robosystems_client/api/auth/check_password_strength.py +2 -0
- robosystems_client/api/auth/complete_sso_auth.py +3 -0
- robosystems_client/api/auth/forgot_password.py +6 -3
- robosystems_client/api/auth/generate_sso_token.py +3 -0
- robosystems_client/api/auth/get_captcha_config.py +1 -0
- robosystems_client/api/auth/get_current_auth_user.py +3 -0
- robosystems_client/api/auth/get_password_policy.py +1 -0
- robosystems_client/api/auth/login_user.py +7 -3
- robosystems_client/api/auth/logout_user.py +2 -0
- robosystems_client/api/auth/refresh_auth_session.py +3 -0
- robosystems_client/api/auth/register_user.py +11 -6
- robosystems_client/api/auth/resend_verification_email.py +8 -3
- robosystems_client/api/auth/reset_password.py +3 -0
- robosystems_client/api/auth/sso_token_exchange.py +7 -3
- robosystems_client/api/auth/validate_reset_token.py +2 -0
- robosystems_client/api/auth/verify_email.py +3 -0
- robosystems_client/api/backup/create_backup.py +13 -7
- robosystems_client/api/backup/get_backup_download_url.py +8 -3
- robosystems_client/api/backup/get_backup_stats.py +2 -0
- robosystems_client/api/backup/list_backups.py +6 -4
- robosystems_client/api/backup/restore_backup.py +27 -8
- robosystems_client/api/connections/create_connection.py +13 -7
- robosystems_client/api/connections/create_link_token.py +8 -3
- robosystems_client/api/connections/delete_connection.py +12 -7
- robosystems_client/api/connections/exchange_link_token.py +8 -3
- robosystems_client/api/connections/get_connection.py +8 -3
- robosystems_client/api/connections/get_connection_options.py +7 -3
- robosystems_client/api/connections/init_o_auth.py +2 -0
- robosystems_client/api/connections/list_connections.py +7 -3
- robosystems_client/api/connections/oauth_callback.py +9 -3
- robosystems_client/api/connections/sync_connection.py +12 -7
- robosystems_client/api/graph_analytics/get_graph_metrics.py +12 -7
- robosystems_client/api/graph_analytics/get_graph_usage_stats.py +11 -7
- robosystems_client/api/graph_billing/get_current_graph_bill.py +8 -3
- robosystems_client/api/graph_billing/get_graph_billing_history.py +8 -3
- robosystems_client/api/graph_billing/get_graph_monthly_bill.py +9 -3
- robosystems_client/api/graph_billing/get_graph_usage_details.py +9 -3
- robosystems_client/api/graph_credits/check_credit_balance.py +8 -3
- robosystems_client/api/graph_credits/check_storage_limits.py +8 -3
- robosystems_client/api/graph_credits/get_credit_summary.py +8 -3
- robosystems_client/api/graph_credits/get_storage_usage.py +7 -3
- robosystems_client/api/graph_credits/list_credit_transactions.py +8 -3
- robosystems_client/api/graph_health/get_database_health.py +8 -3
- robosystems_client/api/graph_info/get_database_info.py +8 -3
- robosystems_client/api/graph_limits/get_graph_limits.py +8 -3
- robosystems_client/api/graphs/create_graph.py +6 -4
- robosystems_client/api/graphs/get_available_extensions.py +1 -0
- robosystems_client/api/graphs/get_graphs.py +2 -0
- robosystems_client/api/graphs/select_graph.py +8 -3
- robosystems_client/api/mcp/call_mcp_tool.py +17 -7
- robosystems_client/api/mcp/list_mcp_tools.py +11 -7
- robosystems_client/api/operations/cancel_operation.py +9 -3
- robosystems_client/api/operations/get_operation_status.py +8 -3
- robosystems_client/api/operations/stream_operation_events.py +8 -3
- robosystems_client/api/query/execute_cypher_query.py +48 -15
- robosystems_client/api/schema/export_graph_schema.py +2 -0
- robosystems_client/api/schema/{get_graph_schema_info.py → get_graph_schema.py} +37 -47
- robosystems_client/api/schema/validate_schema.py +9 -4
- robosystems_client/api/service_offerings/get_service_offerings.py +2 -0
- robosystems_client/api/status/get_service_status.py +1 -0
- robosystems_client/api/subgraphs/create_subgraph.py +2 -0
- robosystems_client/api/subgraphs/delete_subgraph.py +14 -6
- robosystems_client/api/subgraphs/get_subgraph_info.py +13 -6
- robosystems_client/api/subgraphs/get_subgraph_quota.py +9 -3
- robosystems_client/api/subgraphs/list_subgraphs.py +2 -0
- robosystems_client/api/tables/delete_file_v1_graphs_graph_id_tables_files_file_id_delete.py +287 -0
- robosystems_client/api/tables/get_file_info_v1_graphs_graph_id_tables_files_file_id_get.py +283 -0
- robosystems_client/api/tables/get_upload_url_v1_graphs_graph_id_tables_table_name_files_post.py +260 -0
- robosystems_client/api/tables/ingest_tables_v1_graphs_graph_id_tables_ingest_post.py +251 -0
- robosystems_client/api/tables/list_table_files_v1_graphs_graph_id_tables_table_name_files_get.py +283 -0
- robosystems_client/api/{backup/export_backup.py → tables/list_tables_v1_graphs_graph_id_tables_get.py} +36 -36
- robosystems_client/api/{schema/list_schema_extensions.py → tables/query_tables_v1_graphs_graph_id_tables_query_post.py} +67 -43
- robosystems_client/api/tables/update_file_v1_graphs_graph_id_tables_files_file_id_patch.py +306 -0
- robosystems_client/api/user/create_user_api_key.py +2 -0
- robosystems_client/api/user/get_all_credit_summaries.py +6 -3
- robosystems_client/api/user/get_current_user.py +2 -0
- robosystems_client/api/user/list_user_api_keys.py +2 -0
- robosystems_client/api/user/revoke_user_api_key.py +7 -3
- robosystems_client/api/user/update_user.py +2 -0
- robosystems_client/api/user/update_user_api_key.py +2 -0
- robosystems_client/api/user/update_user_password.py +8 -3
- robosystems_client/api/user_analytics/get_detailed_user_analytics.py +2 -0
- robosystems_client/api/user_analytics/get_user_usage_overview.py +2 -0
- robosystems_client/api/user_limits/get_all_shared_repository_limits.py +2 -0
- robosystems_client/api/user_limits/get_shared_repository_limits.py +6 -4
- robosystems_client/api/user_limits/get_user_limits.py +3 -0
- robosystems_client/api/user_limits/get_user_usage.py +2 -0
- robosystems_client/api/user_subscriptions/cancel_shared_repository_subscription.py +11 -6
- robosystems_client/api/user_subscriptions/get_repository_credits.py +7 -3
- robosystems_client/api/user_subscriptions/get_shared_repository_credits.py +7 -3
- robosystems_client/api/user_subscriptions/get_user_shared_subscriptions.py +7 -3
- robosystems_client/api/user_subscriptions/subscribe_to_shared_repository.py +8 -3
- robosystems_client/api/user_subscriptions/upgrade_shared_repository_subscription.py +12 -6
- robosystems_client/extensions/README.md +1 -212
- robosystems_client/extensions/__init__.py +12 -28
- robosystems_client/extensions/extensions.py +3 -17
- robosystems_client/extensions/operation_client.py +12 -4
- robosystems_client/extensions/query_client.py +38 -24
- robosystems_client/extensions/sse_client.py +11 -0
- robosystems_client/extensions/table_ingest_client.py +466 -0
- robosystems_client/models/__init__.py +39 -29
- robosystems_client/models/backup_restore_request.py +1 -12
- robosystems_client/models/bulk_ingest_request.py +50 -0
- robosystems_client/models/bulk_ingest_response.py +137 -0
- robosystems_client/models/create_graph_request.py +4 -3
- robosystems_client/models/delete_file_v1_graphs_graph_id_tables_files_file_id_delete_response_delete_file_v1_graphs_graph_id_tables_files_file_id_delete.py +47 -0
- robosystems_client/models/file_update_request.py +62 -0
- robosystems_client/models/file_upload_request.py +51 -0
- robosystems_client/models/file_upload_response.py +83 -0
- robosystems_client/models/{get_graph_schema_info_response_getgraphschemainfo.py → get_file_info_v1_graphs_graph_id_tables_files_file_id_get_response_get_file_info_v1_graphs_graph_id_tables_files_file_id_get.py} +8 -5
- robosystems_client/models/{copy_response_error_details_type_0.py → get_graph_schema_response_getgraphschema.py} +5 -5
- robosystems_client/models/list_table_files_v1_graphs_graph_id_tables_table_name_files_get_response_list_table_files_v1_graphs_graph_id_tables_table_name_files_get.py +47 -0
- robosystems_client/models/table_info.py +107 -0
- robosystems_client/models/table_ingest_result.py +107 -0
- robosystems_client/models/table_list_response.py +81 -0
- robosystems_client/models/table_query_request.py +40 -0
- robosystems_client/models/table_query_response.py +92 -0
- robosystems_client/models/{list_schema_extensions_response_listschemaextensions.py → update_file_v1_graphs_graph_id_tables_files_file_id_patch_response_update_file_v1_graphs_graph_id_tables_files_file_id_patch.py} +8 -5
- {robosystems_client-0.1.19.dist-info → robosystems_client-0.2.1.dist-info}/METADATA +25 -12
- {robosystems_client-0.1.19.dist-info → robosystems_client-0.2.1.dist-info}/RECORD +129 -122
- robosystems_client/api/copy/copy_data_to_graph.py +0 -486
- robosystems_client/extensions/copy_client.py +0 -479
- robosystems_client/models/copy_response.py +0 -275
- robosystems_client/models/copy_response_status.py +0 -11
- robosystems_client/models/data_frame_copy_request.py +0 -125
- robosystems_client/models/data_frame_copy_request_format.py +0 -10
- robosystems_client/models/s3_copy_request.py +0 -378
- robosystems_client/models/s3_copy_request_file_format.py +0 -12
- robosystems_client/models/s3_copy_request_s3_url_style_type_0.py +0 -9
- robosystems_client/models/url_copy_request.py +0 -157
- robosystems_client/models/url_copy_request_file_format.py +0 -10
- robosystems_client/models/url_copy_request_headers_type_0.py +0 -44
- /robosystems_client/api/{copy → tables}/__init__.py +0 -0
- {robosystems_client-0.1.19.dist-info → robosystems_client-0.2.1.dist-info}/WHEEL +0 -0
- {robosystems_client-0.1.19.dist-info → robosystems_client-0.2.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -83,7 +83,12 @@ class OperationClient:
|
|
|
83
83
|
def monitor_operation(
|
|
84
84
|
self, operation_id: str, options: MonitorOptions = None
|
|
85
85
|
) -> OperationResult:
|
|
86
|
-
"""Monitor a single operation until completion
|
|
86
|
+
"""Monitor a single operation until completion
|
|
87
|
+
|
|
88
|
+
The SSE stream will replay all events from the beginning (from_sequence=0),
|
|
89
|
+
so even if the operation completed before we connected, we'll still receive
|
|
90
|
+
all events including the completion event.
|
|
91
|
+
"""
|
|
87
92
|
if options is None:
|
|
88
93
|
options = MonitorOptions()
|
|
89
94
|
|
|
@@ -91,8 +96,9 @@ class OperationClient:
|
|
|
91
96
|
completed = False
|
|
92
97
|
error = None
|
|
93
98
|
|
|
94
|
-
# Set up SSE connection
|
|
95
|
-
|
|
99
|
+
# Set up SSE connection with event replay from the beginning
|
|
100
|
+
# This handles the race condition where the operation may have already completed
|
|
101
|
+
sse_config = SSEConfig(base_url=self.base_url, headers=self.headers)
|
|
96
102
|
sse_client = SSEClient(sse_config)
|
|
97
103
|
|
|
98
104
|
def on_operation_started(data):
|
|
@@ -265,6 +271,8 @@ class AsyncOperationClient:
|
|
|
265
271
|
def __init__(self, config: Dict[str, Any]):
|
|
266
272
|
self.config = config
|
|
267
273
|
self.base_url = config["base_url"]
|
|
274
|
+
self.headers = config.get("headers", {})
|
|
275
|
+
self.token = config.get("token")
|
|
268
276
|
self.active_operations: Dict[str, AsyncSSEClient] = {}
|
|
269
277
|
|
|
270
278
|
async def monitor_operation(
|
|
@@ -279,7 +287,7 @@ class AsyncOperationClient:
|
|
|
279
287
|
error = None
|
|
280
288
|
|
|
281
289
|
# Set up SSE connection
|
|
282
|
-
sse_config = SSEConfig(base_url=self.base_url)
|
|
290
|
+
sse_config = SSEConfig(base_url=self.base_url, headers=self.headers)
|
|
283
291
|
sse_client = AsyncSSEClient(sse_config)
|
|
284
292
|
|
|
285
293
|
def on_operation_started(data):
|
|
@@ -98,45 +98,59 @@ class QueryClient:
|
|
|
98
98
|
)
|
|
99
99
|
|
|
100
100
|
# Execute the query through the generated client
|
|
101
|
-
from ..client import
|
|
102
|
-
|
|
103
|
-
# Create client with
|
|
104
|
-
|
|
101
|
+
from ..client import AuthenticatedClient
|
|
102
|
+
|
|
103
|
+
# Create authenticated client with X-API-Key
|
|
104
|
+
if not self.token:
|
|
105
|
+
raise Exception("No API key provided. Set X-API-Key in headers.")
|
|
106
|
+
|
|
107
|
+
client = AuthenticatedClient(
|
|
108
|
+
base_url=self.base_url,
|
|
109
|
+
token=self.token,
|
|
110
|
+
prefix="",
|
|
111
|
+
auth_header_name="X-API-Key",
|
|
112
|
+
headers=self.headers,
|
|
113
|
+
)
|
|
105
114
|
|
|
106
115
|
try:
|
|
107
116
|
kwargs = {"graph_id": graph_id, "client": client, "body": query_request}
|
|
108
|
-
# Only add token if it's a valid string
|
|
109
|
-
if self.token and isinstance(self.token, str) and self.token.strip():
|
|
110
|
-
kwargs["token"] = self.token
|
|
111
117
|
response = execute_cypher_query(**kwargs)
|
|
112
118
|
|
|
113
119
|
# Check response type and handle accordingly
|
|
114
120
|
if hasattr(response, "parsed") and response.parsed:
|
|
115
121
|
response_data = response.parsed
|
|
116
122
|
|
|
123
|
+
# Handle both dict and object responses
|
|
124
|
+
if isinstance(response_data, dict):
|
|
125
|
+
# Response is a plain dict
|
|
126
|
+
data = response_data
|
|
127
|
+
else:
|
|
128
|
+
# Response is an object with additional_properties
|
|
129
|
+
data = (
|
|
130
|
+
response_data.additional_properties
|
|
131
|
+
if hasattr(response_data, "additional_properties")
|
|
132
|
+
else response_data
|
|
133
|
+
)
|
|
134
|
+
|
|
117
135
|
# Check if this is an immediate response
|
|
118
|
-
if
|
|
136
|
+
if "data" in data and "columns" in data:
|
|
119
137
|
return QueryResult(
|
|
120
|
-
data=
|
|
121
|
-
columns=
|
|
122
|
-
row_count=
|
|
123
|
-
execution_time_ms=
|
|
138
|
+
data=data["data"],
|
|
139
|
+
columns=data["columns"],
|
|
140
|
+
row_count=data.get("row_count", len(data["data"])),
|
|
141
|
+
execution_time_ms=data.get("execution_time_ms", 0),
|
|
124
142
|
graph_id=graph_id,
|
|
125
|
-
timestamp=
|
|
143
|
+
timestamp=data.get("timestamp", datetime.now().isoformat()),
|
|
126
144
|
)
|
|
127
145
|
|
|
128
146
|
# Check if this is a queued response
|
|
129
|
-
if (
|
|
130
|
-
hasattr(response_data, "status")
|
|
131
|
-
and response_data.status == "queued"
|
|
132
|
-
and hasattr(response_data, "operation_id")
|
|
133
|
-
):
|
|
147
|
+
if data.get("status") == "queued" and "operation_id" in data:
|
|
134
148
|
queued_response = QueuedQueryResponse(
|
|
135
|
-
status=
|
|
136
|
-
operation_id=
|
|
137
|
-
queue_position=
|
|
138
|
-
estimated_wait_seconds=
|
|
139
|
-
message=
|
|
149
|
+
status=data["status"],
|
|
150
|
+
operation_id=data["operation_id"],
|
|
151
|
+
queue_position=data.get("queue_position", 0),
|
|
152
|
+
estimated_wait_seconds=data.get("estimated_wait_seconds", 0),
|
|
153
|
+
message=data.get("message", "Query queued"),
|
|
140
154
|
)
|
|
141
155
|
|
|
142
156
|
# Notify about queue status
|
|
@@ -182,7 +196,7 @@ class QueryClient:
|
|
|
182
196
|
error = None
|
|
183
197
|
|
|
184
198
|
# Set up SSE connection
|
|
185
|
-
sse_config = SSEConfig(base_url=self.base_url)
|
|
199
|
+
sse_config = SSEConfig(base_url=self.base_url, headers=self.headers)
|
|
186
200
|
self.sse_client = SSEClient(sse_config)
|
|
187
201
|
|
|
188
202
|
# Set up event handlers
|
|
@@ -124,9 +124,11 @@ class SSEClient:
|
|
|
124
124
|
|
|
125
125
|
try:
|
|
126
126
|
event_buffer = {"event": None, "data": [], "id": None, "retry": None}
|
|
127
|
+
print("[SSE DEBUG] Starting to process events...")
|
|
127
128
|
|
|
128
129
|
for line in self._response.iter_lines():
|
|
129
130
|
if self.closed:
|
|
131
|
+
print("[SSE DEBUG] Stream closed, breaking out of loop")
|
|
130
132
|
break
|
|
131
133
|
|
|
132
134
|
line = line.strip()
|
|
@@ -134,6 +136,7 @@ class SSEClient:
|
|
|
134
136
|
# Empty line indicates end of event
|
|
135
137
|
if not line:
|
|
136
138
|
if event_buffer["data"] or event_buffer["event"]:
|
|
139
|
+
print(f"[SSE DEBUG] Dispatching event: {event_buffer.get('event')}")
|
|
137
140
|
self._dispatch_event(event_buffer)
|
|
138
141
|
event_buffer = {"event": None, "data": [], "id": None, "retry": None}
|
|
139
142
|
continue
|
|
@@ -169,9 +172,13 @@ class SSEClient:
|
|
|
169
172
|
|
|
170
173
|
# Handle final event if stream ends without empty line
|
|
171
174
|
if event_buffer["data"] or event_buffer["event"]:
|
|
175
|
+
print("[SSE DEBUG] Dispatching final event after stream end")
|
|
172
176
|
self._dispatch_event(event_buffer)
|
|
173
177
|
|
|
178
|
+
print("[SSE DEBUG] Event processing loop ended")
|
|
179
|
+
|
|
174
180
|
except Exception as error:
|
|
181
|
+
print(f"[SSE DEBUG] Exception in event processing: {error}")
|
|
175
182
|
if not self.closed:
|
|
176
183
|
self.emit("error", error)
|
|
177
184
|
|
|
@@ -394,9 +401,13 @@ class AsyncSSEClient:
|
|
|
394
401
|
|
|
395
402
|
# Handle final event if stream ends without empty line
|
|
396
403
|
if event_buffer["data"] or event_buffer["event"]:
|
|
404
|
+
print("[SSE DEBUG] Dispatching final event after stream end")
|
|
397
405
|
self._dispatch_event(event_buffer)
|
|
398
406
|
|
|
407
|
+
print("[SSE DEBUG] Event processing loop ended")
|
|
408
|
+
|
|
399
409
|
except Exception as error:
|
|
410
|
+
print(f"[SSE DEBUG] Exception in event processing: {error}")
|
|
400
411
|
if not self.closed:
|
|
401
412
|
self.emit("error", error)
|
|
402
413
|
|
|
@@ -0,0 +1,466 @@
|
|
|
1
|
+
"""Table Ingest Client for RoboSystems API
|
|
2
|
+
|
|
3
|
+
Simplifies uploading Parquet files to staging tables and ingesting them into graphs.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from io import BytesIO
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Dict, Any, Optional, Callable, List, Union, BinaryIO
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
import httpx
|
|
13
|
+
|
|
14
|
+
from ..api.tables.get_upload_url_v1_graphs_graph_id_tables_table_name_files_post import (
|
|
15
|
+
sync_detailed as get_upload_url,
|
|
16
|
+
)
|
|
17
|
+
from ..api.tables.update_file_v1_graphs_graph_id_tables_files_file_id_patch import (
|
|
18
|
+
sync_detailed as update_file,
|
|
19
|
+
)
|
|
20
|
+
from ..api.tables.list_tables_v1_graphs_graph_id_tables_get import (
|
|
21
|
+
sync_detailed as list_tables,
|
|
22
|
+
)
|
|
23
|
+
from ..api.tables.ingest_tables_v1_graphs_graph_id_tables_ingest_post import (
|
|
24
|
+
sync_detailed as ingest_tables,
|
|
25
|
+
)
|
|
26
|
+
from ..models.file_upload_request import FileUploadRequest
|
|
27
|
+
from ..models.file_update_request import FileUpdateRequest
|
|
28
|
+
from ..models.bulk_ingest_request import BulkIngestRequest
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class UploadOptions:
|
|
35
|
+
"""Options for file upload operations"""
|
|
36
|
+
|
|
37
|
+
on_progress: Optional[Callable[[str], None]] = None
|
|
38
|
+
fix_localstack_url: bool = True # Auto-fix LocalStack URLs for localhost
|
|
39
|
+
file_name: Optional[str] = None # Override file name (useful for buffer uploads)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class IngestOptions:
|
|
44
|
+
"""Options for table ingestion operations"""
|
|
45
|
+
|
|
46
|
+
ignore_errors: bool = True
|
|
47
|
+
rebuild: bool = False
|
|
48
|
+
on_progress: Optional[Callable[[str], None]] = None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class UploadResult:
|
|
53
|
+
"""Result from file upload operation"""
|
|
54
|
+
|
|
55
|
+
file_id: str
|
|
56
|
+
file_size: int
|
|
57
|
+
row_count: int
|
|
58
|
+
table_name: str
|
|
59
|
+
file_name: str
|
|
60
|
+
success: bool = True
|
|
61
|
+
error: Optional[str] = None
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@dataclass
|
|
65
|
+
class TableInfo:
|
|
66
|
+
"""Information about a staging table"""
|
|
67
|
+
|
|
68
|
+
table_name: str
|
|
69
|
+
row_count: int
|
|
70
|
+
file_count: int
|
|
71
|
+
total_size_bytes: int
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class TableIngestClient:
|
|
75
|
+
"""Enhanced table ingest client with simplified upload workflow"""
|
|
76
|
+
|
|
77
|
+
def __init__(self, config: Dict[str, Any]):
|
|
78
|
+
self.config = config
|
|
79
|
+
self.base_url = config["base_url"]
|
|
80
|
+
self.headers = config.get("headers", {})
|
|
81
|
+
self.token = config.get("token")
|
|
82
|
+
# Create httpx client for S3 uploads
|
|
83
|
+
self._http_client = httpx.Client(timeout=120.0)
|
|
84
|
+
|
|
85
|
+
def upload_parquet_file(
|
|
86
|
+
self,
|
|
87
|
+
graph_id: str,
|
|
88
|
+
table_name: str,
|
|
89
|
+
file_or_buffer: Union[Path, str, BytesIO, BinaryIO],
|
|
90
|
+
options: Optional[UploadOptions] = None,
|
|
91
|
+
) -> UploadResult:
|
|
92
|
+
"""
|
|
93
|
+
Upload a Parquet file to a staging table.
|
|
94
|
+
|
|
95
|
+
This method handles the complete 3-step upload process:
|
|
96
|
+
1. Get presigned upload URL
|
|
97
|
+
2. Upload file to S3
|
|
98
|
+
3. Update file metadata
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
graph_id: The graph ID
|
|
102
|
+
table_name: Name of the staging table
|
|
103
|
+
file_or_buffer: Path to the Parquet file or BytesIO/BinaryIO buffer
|
|
104
|
+
options: Upload options
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
UploadResult with upload details
|
|
108
|
+
"""
|
|
109
|
+
if options is None:
|
|
110
|
+
options = UploadOptions()
|
|
111
|
+
|
|
112
|
+
# Auto-detect if this is a file path or buffer
|
|
113
|
+
is_buffer = isinstance(file_or_buffer, (BytesIO, BinaryIO)) or hasattr(
|
|
114
|
+
file_or_buffer, "read"
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
if is_buffer:
|
|
118
|
+
# Handle buffer upload
|
|
119
|
+
file_name = options.file_name or "data.parquet"
|
|
120
|
+
else:
|
|
121
|
+
# Handle file path upload
|
|
122
|
+
file_path = Path(file_or_buffer)
|
|
123
|
+
file_name = file_path.name
|
|
124
|
+
if not file_path.exists():
|
|
125
|
+
return UploadResult(
|
|
126
|
+
file_id="",
|
|
127
|
+
file_size=0,
|
|
128
|
+
row_count=0,
|
|
129
|
+
table_name=table_name,
|
|
130
|
+
file_name=file_name,
|
|
131
|
+
success=False,
|
|
132
|
+
error=f"File not found: {file_path}",
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
try:
|
|
136
|
+
# Import client here to avoid circular imports
|
|
137
|
+
from ..client import AuthenticatedClient
|
|
138
|
+
|
|
139
|
+
# Create authenticated client with X-API-Key
|
|
140
|
+
# The token is extracted from X-API-Key header in extensions.py
|
|
141
|
+
if not self.token:
|
|
142
|
+
return UploadResult(
|
|
143
|
+
file_id="",
|
|
144
|
+
file_size=0,
|
|
145
|
+
row_count=0,
|
|
146
|
+
table_name=table_name,
|
|
147
|
+
file_name=file_name,
|
|
148
|
+
success=False,
|
|
149
|
+
error="No API key provided. Set X-API-Key in headers.",
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
client = AuthenticatedClient(
|
|
153
|
+
base_url=self.base_url,
|
|
154
|
+
token=self.token,
|
|
155
|
+
prefix="", # No prefix for X-API-Key
|
|
156
|
+
auth_header_name="X-API-Key", # Use X-API-Key header instead of Authorization
|
|
157
|
+
headers=self.headers,
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# Step 1: Get presigned upload URL
|
|
161
|
+
if options.on_progress:
|
|
162
|
+
options.on_progress(
|
|
163
|
+
f"Getting upload URL for {file_name} -> table '{table_name}'..."
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
upload_request = FileUploadRequest(
|
|
167
|
+
file_name=file_name, content_type="application/x-parquet"
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
kwargs = {
|
|
171
|
+
"graph_id": graph_id,
|
|
172
|
+
"table_name": table_name,
|
|
173
|
+
"client": client,
|
|
174
|
+
"body": upload_request,
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
response = get_upload_url(**kwargs)
|
|
178
|
+
|
|
179
|
+
if not response.parsed:
|
|
180
|
+
error_msg = f"Failed to get upload URL (status: {response.status_code})"
|
|
181
|
+
if hasattr(response, "content"):
|
|
182
|
+
try:
|
|
183
|
+
error_detail = json.loads(response.content)
|
|
184
|
+
error_msg = f"{error_msg}: {error_detail}"
|
|
185
|
+
except (json.JSONDecodeError, ValueError):
|
|
186
|
+
error_msg = f"{error_msg}: {response.content[:200]}"
|
|
187
|
+
|
|
188
|
+
return UploadResult(
|
|
189
|
+
file_id="",
|
|
190
|
+
file_size=0,
|
|
191
|
+
row_count=0,
|
|
192
|
+
table_name=table_name,
|
|
193
|
+
file_name=file_name,
|
|
194
|
+
success=False,
|
|
195
|
+
error=error_msg,
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
upload_url = response.parsed.upload_url
|
|
199
|
+
file_id = response.parsed.file_id
|
|
200
|
+
|
|
201
|
+
# Fix LocalStack URL if needed
|
|
202
|
+
if options.fix_localstack_url and "localstack:4566" in upload_url:
|
|
203
|
+
upload_url = upload_url.replace("localstack:4566", "localhost:4566")
|
|
204
|
+
logger.debug("Fixed LocalStack URL for localhost access")
|
|
205
|
+
|
|
206
|
+
# Step 2: Upload file to S3
|
|
207
|
+
if options.on_progress:
|
|
208
|
+
options.on_progress(f"Uploading {file_name} to S3...")
|
|
209
|
+
|
|
210
|
+
# Read file content - handle both paths and buffers
|
|
211
|
+
if is_buffer:
|
|
212
|
+
# Read from buffer
|
|
213
|
+
if hasattr(file_or_buffer, "getvalue"):
|
|
214
|
+
file_content = file_or_buffer.getvalue()
|
|
215
|
+
else:
|
|
216
|
+
# BinaryIO or file-like object
|
|
217
|
+
file_or_buffer.seek(0)
|
|
218
|
+
file_content = file_or_buffer.read()
|
|
219
|
+
file_size = len(file_content)
|
|
220
|
+
else:
|
|
221
|
+
# Read from file path
|
|
222
|
+
with open(file_path, "rb") as f:
|
|
223
|
+
file_content = f.read()
|
|
224
|
+
file_size = len(file_content)
|
|
225
|
+
|
|
226
|
+
s3_response = self._http_client.put(
|
|
227
|
+
upload_url,
|
|
228
|
+
content=file_content,
|
|
229
|
+
headers={"Content-Type": "application/x-parquet"},
|
|
230
|
+
)
|
|
231
|
+
s3_response.raise_for_status()
|
|
232
|
+
|
|
233
|
+
# Step 3: Get row count and update file metadata
|
|
234
|
+
if options.on_progress:
|
|
235
|
+
options.on_progress(f"Updating file metadata for {file_name}...")
|
|
236
|
+
|
|
237
|
+
try:
|
|
238
|
+
import pyarrow.parquet as pq
|
|
239
|
+
|
|
240
|
+
if is_buffer:
|
|
241
|
+
# Read from buffer for row count
|
|
242
|
+
if hasattr(file_or_buffer, "seek"):
|
|
243
|
+
file_or_buffer.seek(0)
|
|
244
|
+
parquet_table = pq.read_table(file_or_buffer)
|
|
245
|
+
else:
|
|
246
|
+
# Read from file path
|
|
247
|
+
parquet_table = pq.read_table(file_path)
|
|
248
|
+
|
|
249
|
+
row_count = parquet_table.num_rows
|
|
250
|
+
except ImportError:
|
|
251
|
+
logger.warning(
|
|
252
|
+
"pyarrow not installed, row count will be estimated from file size"
|
|
253
|
+
)
|
|
254
|
+
# Rough estimate: ~100 bytes per row for typical data
|
|
255
|
+
row_count = file_size // 100
|
|
256
|
+
|
|
257
|
+
metadata_update = FileUpdateRequest(
|
|
258
|
+
file_size_bytes=file_size, row_count=row_count
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
kwargs = {
|
|
262
|
+
"graph_id": graph_id,
|
|
263
|
+
"file_id": file_id,
|
|
264
|
+
"client": client,
|
|
265
|
+
"body": metadata_update,
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
update_response = update_file(**kwargs)
|
|
269
|
+
|
|
270
|
+
if not update_response.parsed:
|
|
271
|
+
return UploadResult(
|
|
272
|
+
file_id=file_id,
|
|
273
|
+
file_size=file_size,
|
|
274
|
+
row_count=row_count,
|
|
275
|
+
table_name=table_name,
|
|
276
|
+
file_name=file_name,
|
|
277
|
+
success=False,
|
|
278
|
+
error="Failed to update file metadata",
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
if options.on_progress:
|
|
282
|
+
options.on_progress(
|
|
283
|
+
f"✅ Uploaded {file_name} ({file_size:,} bytes, {row_count:,} rows)"
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
return UploadResult(
|
|
287
|
+
file_id=file_id,
|
|
288
|
+
file_size=file_size,
|
|
289
|
+
row_count=row_count,
|
|
290
|
+
table_name=table_name,
|
|
291
|
+
file_name=file_name,
|
|
292
|
+
success=True,
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
except Exception as e:
|
|
296
|
+
logger.error(f"Upload failed for {file_name}: {e}")
|
|
297
|
+
return UploadResult(
|
|
298
|
+
file_id="",
|
|
299
|
+
file_size=0,
|
|
300
|
+
row_count=0,
|
|
301
|
+
table_name=table_name,
|
|
302
|
+
file_name=file_name,
|
|
303
|
+
success=False,
|
|
304
|
+
error=str(e),
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
def list_staging_tables(self, graph_id: str) -> List[TableInfo]:
|
|
308
|
+
"""
|
|
309
|
+
List all staging tables in a graph.
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
graph_id: The graph ID
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
List of TableInfo objects
|
|
316
|
+
"""
|
|
317
|
+
try:
|
|
318
|
+
from ..client import AuthenticatedClient
|
|
319
|
+
|
|
320
|
+
if not self.token:
|
|
321
|
+
logger.error("No API key provided")
|
|
322
|
+
return []
|
|
323
|
+
|
|
324
|
+
client = AuthenticatedClient(
|
|
325
|
+
base_url=self.base_url,
|
|
326
|
+
token=self.token,
|
|
327
|
+
prefix="",
|
|
328
|
+
auth_header_name="X-API-Key",
|
|
329
|
+
headers=self.headers,
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
kwargs = {"graph_id": graph_id, "client": client}
|
|
333
|
+
|
|
334
|
+
response = list_tables(**kwargs)
|
|
335
|
+
|
|
336
|
+
if not response.parsed:
|
|
337
|
+
logger.error("Failed to list tables")
|
|
338
|
+
return []
|
|
339
|
+
|
|
340
|
+
tables = []
|
|
341
|
+
for table_data in response.parsed.tables:
|
|
342
|
+
tables.append(
|
|
343
|
+
TableInfo(
|
|
344
|
+
table_name=table_data.table_name,
|
|
345
|
+
row_count=table_data.row_count,
|
|
346
|
+
file_count=table_data.file_count,
|
|
347
|
+
total_size_bytes=table_data.total_size_bytes,
|
|
348
|
+
)
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
return tables
|
|
352
|
+
|
|
353
|
+
except Exception as e:
|
|
354
|
+
logger.error(f"Failed to list tables: {e}")
|
|
355
|
+
return []
|
|
356
|
+
|
|
357
|
+
def ingest_all_tables(
|
|
358
|
+
self, graph_id: str, options: Optional[IngestOptions] = None
|
|
359
|
+
) -> Dict[str, Any]:
|
|
360
|
+
"""
|
|
361
|
+
Ingest all staging tables into the graph.
|
|
362
|
+
|
|
363
|
+
Args:
|
|
364
|
+
graph_id: The graph ID
|
|
365
|
+
options: Ingest options
|
|
366
|
+
|
|
367
|
+
Returns:
|
|
368
|
+
Dictionary with ingestion results
|
|
369
|
+
"""
|
|
370
|
+
if options is None:
|
|
371
|
+
options = IngestOptions()
|
|
372
|
+
|
|
373
|
+
try:
|
|
374
|
+
from ..client import AuthenticatedClient
|
|
375
|
+
|
|
376
|
+
if not self.token:
|
|
377
|
+
return {"success": False, "error": "No API key provided"}
|
|
378
|
+
|
|
379
|
+
client = AuthenticatedClient(
|
|
380
|
+
base_url=self.base_url,
|
|
381
|
+
token=self.token,
|
|
382
|
+
prefix="",
|
|
383
|
+
auth_header_name="X-API-Key",
|
|
384
|
+
headers=self.headers,
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
if options.on_progress:
|
|
388
|
+
options.on_progress("Starting table ingestion...")
|
|
389
|
+
|
|
390
|
+
ingest_request = BulkIngestRequest(
|
|
391
|
+
ignore_errors=options.ignore_errors, rebuild=options.rebuild
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
kwargs = {
|
|
395
|
+
"graph_id": graph_id,
|
|
396
|
+
"client": client,
|
|
397
|
+
"body": ingest_request,
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
response = ingest_tables(**kwargs)
|
|
401
|
+
|
|
402
|
+
if not response.parsed:
|
|
403
|
+
return {"success": False, "error": "Failed to ingest tables"}
|
|
404
|
+
|
|
405
|
+
result = {
|
|
406
|
+
"success": True,
|
|
407
|
+
"operation_id": getattr(response.parsed, "operation_id", None),
|
|
408
|
+
"message": getattr(response.parsed, "message", "Ingestion started"),
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
if options.on_progress:
|
|
412
|
+
options.on_progress("✅ Table ingestion completed")
|
|
413
|
+
|
|
414
|
+
return result
|
|
415
|
+
|
|
416
|
+
except Exception as e:
|
|
417
|
+
logger.error(f"Failed to ingest tables: {e}")
|
|
418
|
+
return {"success": False, "error": str(e)}
|
|
419
|
+
|
|
420
|
+
def upload_and_ingest(
|
|
421
|
+
self,
|
|
422
|
+
graph_id: str,
|
|
423
|
+
table_name: str,
|
|
424
|
+
file_path: Path,
|
|
425
|
+
upload_options: Optional[UploadOptions] = None,
|
|
426
|
+
ingest_options: Optional[IngestOptions] = None,
|
|
427
|
+
) -> Dict[str, Any]:
|
|
428
|
+
"""
|
|
429
|
+
Convenience method to upload a file and immediately ingest it.
|
|
430
|
+
|
|
431
|
+
Args:
|
|
432
|
+
graph_id: The graph ID
|
|
433
|
+
table_name: Name of the staging table
|
|
434
|
+
file_path: Path to the Parquet file
|
|
435
|
+
upload_options: Upload options
|
|
436
|
+
ingest_options: Ingest options
|
|
437
|
+
|
|
438
|
+
Returns:
|
|
439
|
+
Dictionary with upload and ingest results
|
|
440
|
+
"""
|
|
441
|
+
# Upload the file
|
|
442
|
+
upload_result = self.upload_parquet_file(
|
|
443
|
+
graph_id, table_name, file_path, upload_options
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
if not upload_result.success:
|
|
447
|
+
return {
|
|
448
|
+
"success": False,
|
|
449
|
+
"upload": upload_result,
|
|
450
|
+
"ingest": None,
|
|
451
|
+
"error": upload_result.error,
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
# Ingest the table
|
|
455
|
+
ingest_result = self.ingest_all_tables(graph_id, ingest_options)
|
|
456
|
+
|
|
457
|
+
return {
|
|
458
|
+
"success": upload_result.success and ingest_result.get("success", False),
|
|
459
|
+
"upload": upload_result,
|
|
460
|
+
"ingest": ingest_result,
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
def close(self):
|
|
464
|
+
"""Close HTTP client connections"""
|
|
465
|
+
if self._http_client:
|
|
466
|
+
self._http_client.close()
|