robosystems-client 0.1.19__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of robosystems-client might be problematic. Click here for more details.

Files changed (141) hide show
  1. robosystems_client/api/agent/auto_select_agent.py +9 -3
  2. robosystems_client/api/agent/batch_process_queries.py +8 -3
  3. robosystems_client/api/agent/execute_specific_agent.py +10 -3
  4. robosystems_client/api/agent/get_agent_metadata.py +3 -0
  5. robosystems_client/api/agent/list_agents.py +3 -0
  6. robosystems_client/api/agent/recommend_agent.py +3 -0
  7. robosystems_client/api/auth/check_password_strength.py +2 -0
  8. robosystems_client/api/auth/complete_sso_auth.py +3 -0
  9. robosystems_client/api/auth/forgot_password.py +6 -3
  10. robosystems_client/api/auth/generate_sso_token.py +3 -0
  11. robosystems_client/api/auth/get_captcha_config.py +1 -0
  12. robosystems_client/api/auth/get_current_auth_user.py +3 -0
  13. robosystems_client/api/auth/get_password_policy.py +1 -0
  14. robosystems_client/api/auth/login_user.py +7 -3
  15. robosystems_client/api/auth/logout_user.py +2 -0
  16. robosystems_client/api/auth/refresh_auth_session.py +3 -0
  17. robosystems_client/api/auth/register_user.py +11 -6
  18. robosystems_client/api/auth/resend_verification_email.py +8 -3
  19. robosystems_client/api/auth/reset_password.py +3 -0
  20. robosystems_client/api/auth/sso_token_exchange.py +7 -3
  21. robosystems_client/api/auth/validate_reset_token.py +2 -0
  22. robosystems_client/api/auth/verify_email.py +3 -0
  23. robosystems_client/api/backup/create_backup.py +13 -7
  24. robosystems_client/api/backup/get_backup_download_url.py +8 -3
  25. robosystems_client/api/backup/get_backup_stats.py +2 -0
  26. robosystems_client/api/backup/list_backups.py +6 -4
  27. robosystems_client/api/backup/restore_backup.py +27 -8
  28. robosystems_client/api/connections/create_connection.py +13 -7
  29. robosystems_client/api/connections/create_link_token.py +8 -3
  30. robosystems_client/api/connections/delete_connection.py +12 -7
  31. robosystems_client/api/connections/exchange_link_token.py +8 -3
  32. robosystems_client/api/connections/get_connection.py +8 -3
  33. robosystems_client/api/connections/get_connection_options.py +7 -3
  34. robosystems_client/api/connections/init_o_auth.py +2 -0
  35. robosystems_client/api/connections/list_connections.py +7 -3
  36. robosystems_client/api/connections/oauth_callback.py +9 -3
  37. robosystems_client/api/connections/sync_connection.py +12 -7
  38. robosystems_client/api/graph_analytics/get_graph_metrics.py +12 -7
  39. robosystems_client/api/graph_analytics/get_graph_usage_stats.py +11 -7
  40. robosystems_client/api/graph_billing/get_current_graph_bill.py +8 -3
  41. robosystems_client/api/graph_billing/get_graph_billing_history.py +8 -3
  42. robosystems_client/api/graph_billing/get_graph_monthly_bill.py +9 -3
  43. robosystems_client/api/graph_billing/get_graph_usage_details.py +9 -3
  44. robosystems_client/api/graph_credits/check_credit_balance.py +8 -3
  45. robosystems_client/api/graph_credits/check_storage_limits.py +8 -3
  46. robosystems_client/api/graph_credits/get_credit_summary.py +8 -3
  47. robosystems_client/api/graph_credits/get_storage_usage.py +7 -3
  48. robosystems_client/api/graph_credits/list_credit_transactions.py +8 -3
  49. robosystems_client/api/graph_health/get_database_health.py +8 -3
  50. robosystems_client/api/graph_info/get_database_info.py +8 -3
  51. robosystems_client/api/graph_limits/get_graph_limits.py +8 -3
  52. robosystems_client/api/graphs/create_graph.py +6 -4
  53. robosystems_client/api/graphs/get_available_extensions.py +1 -0
  54. robosystems_client/api/graphs/get_graphs.py +2 -0
  55. robosystems_client/api/graphs/select_graph.py +8 -3
  56. robosystems_client/api/mcp/call_mcp_tool.py +17 -7
  57. robosystems_client/api/mcp/list_mcp_tools.py +11 -7
  58. robosystems_client/api/operations/cancel_operation.py +9 -3
  59. robosystems_client/api/operations/get_operation_status.py +8 -3
  60. robosystems_client/api/operations/stream_operation_events.py +8 -3
  61. robosystems_client/api/query/execute_cypher_query.py +48 -15
  62. robosystems_client/api/schema/export_graph_schema.py +2 -0
  63. robosystems_client/api/schema/{get_graph_schema_info.py → get_graph_schema.py} +37 -47
  64. robosystems_client/api/schema/validate_schema.py +9 -4
  65. robosystems_client/api/service_offerings/get_service_offerings.py +2 -0
  66. robosystems_client/api/status/get_service_status.py +1 -0
  67. robosystems_client/api/subgraphs/create_subgraph.py +2 -0
  68. robosystems_client/api/subgraphs/delete_subgraph.py +14 -6
  69. robosystems_client/api/subgraphs/get_subgraph_info.py +13 -6
  70. robosystems_client/api/subgraphs/get_subgraph_quota.py +9 -3
  71. robosystems_client/api/subgraphs/list_subgraphs.py +2 -0
  72. robosystems_client/api/tables/delete_file_v1_graphs_graph_id_tables_files_file_id_delete.py +287 -0
  73. robosystems_client/api/tables/get_file_info_v1_graphs_graph_id_tables_files_file_id_get.py +283 -0
  74. robosystems_client/api/tables/get_upload_url_v1_graphs_graph_id_tables_table_name_files_post.py +260 -0
  75. robosystems_client/api/tables/ingest_tables_v1_graphs_graph_id_tables_ingest_post.py +251 -0
  76. robosystems_client/api/tables/list_table_files_v1_graphs_graph_id_tables_table_name_files_get.py +283 -0
  77. robosystems_client/api/{backup/export_backup.py → tables/list_tables_v1_graphs_graph_id_tables_get.py} +36 -36
  78. robosystems_client/api/{schema/list_schema_extensions.py → tables/query_tables_v1_graphs_graph_id_tables_query_post.py} +67 -43
  79. robosystems_client/api/tables/update_file_v1_graphs_graph_id_tables_files_file_id_patch.py +306 -0
  80. robosystems_client/api/user/create_user_api_key.py +2 -0
  81. robosystems_client/api/user/get_all_credit_summaries.py +6 -3
  82. robosystems_client/api/user/get_current_user.py +2 -0
  83. robosystems_client/api/user/list_user_api_keys.py +2 -0
  84. robosystems_client/api/user/revoke_user_api_key.py +7 -3
  85. robosystems_client/api/user/update_user.py +2 -0
  86. robosystems_client/api/user/update_user_api_key.py +2 -0
  87. robosystems_client/api/user/update_user_password.py +8 -3
  88. robosystems_client/api/user_analytics/get_detailed_user_analytics.py +2 -0
  89. robosystems_client/api/user_analytics/get_user_usage_overview.py +2 -0
  90. robosystems_client/api/user_limits/get_all_shared_repository_limits.py +2 -0
  91. robosystems_client/api/user_limits/get_shared_repository_limits.py +6 -4
  92. robosystems_client/api/user_limits/get_user_limits.py +3 -0
  93. robosystems_client/api/user_limits/get_user_usage.py +2 -0
  94. robosystems_client/api/user_subscriptions/cancel_shared_repository_subscription.py +11 -6
  95. robosystems_client/api/user_subscriptions/get_repository_credits.py +7 -3
  96. robosystems_client/api/user_subscriptions/get_shared_repository_credits.py +7 -3
  97. robosystems_client/api/user_subscriptions/get_user_shared_subscriptions.py +7 -3
  98. robosystems_client/api/user_subscriptions/subscribe_to_shared_repository.py +8 -3
  99. robosystems_client/api/user_subscriptions/upgrade_shared_repository_subscription.py +12 -6
  100. robosystems_client/extensions/README.md +1 -212
  101. robosystems_client/extensions/__init__.py +12 -28
  102. robosystems_client/extensions/extensions.py +3 -17
  103. robosystems_client/extensions/operation_client.py +12 -4
  104. robosystems_client/extensions/query_client.py +38 -24
  105. robosystems_client/extensions/sse_client.py +11 -0
  106. robosystems_client/extensions/table_ingest_client.py +466 -0
  107. robosystems_client/models/__init__.py +39 -29
  108. robosystems_client/models/backup_restore_request.py +1 -12
  109. robosystems_client/models/bulk_ingest_request.py +50 -0
  110. robosystems_client/models/bulk_ingest_response.py +137 -0
  111. robosystems_client/models/create_graph_request.py +4 -3
  112. robosystems_client/models/delete_file_v1_graphs_graph_id_tables_files_file_id_delete_response_delete_file_v1_graphs_graph_id_tables_files_file_id_delete.py +47 -0
  113. robosystems_client/models/file_update_request.py +62 -0
  114. robosystems_client/models/file_upload_request.py +51 -0
  115. robosystems_client/models/file_upload_response.py +83 -0
  116. robosystems_client/models/{get_graph_schema_info_response_getgraphschemainfo.py → get_file_info_v1_graphs_graph_id_tables_files_file_id_get_response_get_file_info_v1_graphs_graph_id_tables_files_file_id_get.py} +8 -5
  117. robosystems_client/models/{copy_response_error_details_type_0.py → get_graph_schema_response_getgraphschema.py} +5 -5
  118. robosystems_client/models/list_table_files_v1_graphs_graph_id_tables_table_name_files_get_response_list_table_files_v1_graphs_graph_id_tables_table_name_files_get.py +47 -0
  119. robosystems_client/models/table_info.py +107 -0
  120. robosystems_client/models/table_ingest_result.py +107 -0
  121. robosystems_client/models/table_list_response.py +81 -0
  122. robosystems_client/models/table_query_request.py +40 -0
  123. robosystems_client/models/table_query_response.py +92 -0
  124. robosystems_client/models/{list_schema_extensions_response_listschemaextensions.py → update_file_v1_graphs_graph_id_tables_files_file_id_patch_response_update_file_v1_graphs_graph_id_tables_files_file_id_patch.py} +8 -5
  125. {robosystems_client-0.1.19.dist-info → robosystems_client-0.2.0.dist-info}/METADATA +15 -3
  126. {robosystems_client-0.1.19.dist-info → robosystems_client-0.2.0.dist-info}/RECORD +129 -122
  127. robosystems_client/api/copy/copy_data_to_graph.py +0 -486
  128. robosystems_client/extensions/copy_client.py +0 -479
  129. robosystems_client/models/copy_response.py +0 -275
  130. robosystems_client/models/copy_response_status.py +0 -11
  131. robosystems_client/models/data_frame_copy_request.py +0 -125
  132. robosystems_client/models/data_frame_copy_request_format.py +0 -10
  133. robosystems_client/models/s3_copy_request.py +0 -378
  134. robosystems_client/models/s3_copy_request_file_format.py +0 -12
  135. robosystems_client/models/s3_copy_request_s3_url_style_type_0.py +0 -9
  136. robosystems_client/models/url_copy_request.py +0 -157
  137. robosystems_client/models/url_copy_request_file_format.py +0 -10
  138. robosystems_client/models/url_copy_request_headers_type_0.py +0 -44
  139. /robosystems_client/api/{copy → tables}/__init__.py +0 -0
  140. {robosystems_client-0.1.19.dist-info → robosystems_client-0.2.0.dist-info}/WHEEL +0 -0
  141. {robosystems_client-0.1.19.dist-info → robosystems_client-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -83,7 +83,12 @@ class OperationClient:
83
83
  def monitor_operation(
84
84
  self, operation_id: str, options: MonitorOptions = None
85
85
  ) -> OperationResult:
86
- """Monitor a single operation until completion"""
86
+ """Monitor a single operation until completion
87
+
88
+ The SSE stream will replay all events from the beginning (from_sequence=0),
89
+ so even if the operation completed before we connected, we'll still receive
90
+ all events including the completion event.
91
+ """
87
92
  if options is None:
88
93
  options = MonitorOptions()
89
94
 
@@ -91,8 +96,9 @@ class OperationClient:
91
96
  completed = False
92
97
  error = None
93
98
 
94
- # Set up SSE connection
95
- sse_config = SSEConfig(base_url=self.base_url)
99
+ # Set up SSE connection with event replay from the beginning
100
+ # This handles the race condition where the operation may have already completed
101
+ sse_config = SSEConfig(base_url=self.base_url, headers=self.headers)
96
102
  sse_client = SSEClient(sse_config)
97
103
 
98
104
  def on_operation_started(data):
@@ -265,6 +271,8 @@ class AsyncOperationClient:
265
271
  def __init__(self, config: Dict[str, Any]):
266
272
  self.config = config
267
273
  self.base_url = config["base_url"]
274
+ self.headers = config.get("headers", {})
275
+ self.token = config.get("token")
268
276
  self.active_operations: Dict[str, AsyncSSEClient] = {}
269
277
 
270
278
  async def monitor_operation(
@@ -279,7 +287,7 @@ class AsyncOperationClient:
279
287
  error = None
280
288
 
281
289
  # Set up SSE connection
282
- sse_config = SSEConfig(base_url=self.base_url)
290
+ sse_config = SSEConfig(base_url=self.base_url, headers=self.headers)
283
291
  sse_client = AsyncSSEClient(sse_config)
284
292
 
285
293
  def on_operation_started(data):
@@ -98,45 +98,59 @@ class QueryClient:
98
98
  )
99
99
 
100
100
  # Execute the query through the generated client
101
- from ..client import Client
102
-
103
- # Create client with headers
104
- client = Client(base_url=self.base_url, headers=self.headers)
101
+ from ..client import AuthenticatedClient
102
+
103
+ # Create authenticated client with X-API-Key
104
+ if not self.token:
105
+ raise Exception("No API key provided. Set X-API-Key in headers.")
106
+
107
+ client = AuthenticatedClient(
108
+ base_url=self.base_url,
109
+ token=self.token,
110
+ prefix="",
111
+ auth_header_name="X-API-Key",
112
+ headers=self.headers,
113
+ )
105
114
 
106
115
  try:
107
116
  kwargs = {"graph_id": graph_id, "client": client, "body": query_request}
108
- # Only add token if it's a valid string
109
- if self.token and isinstance(self.token, str) and self.token.strip():
110
- kwargs["token"] = self.token
111
117
  response = execute_cypher_query(**kwargs)
112
118
 
113
119
  # Check response type and handle accordingly
114
120
  if hasattr(response, "parsed") and response.parsed:
115
121
  response_data = response.parsed
116
122
 
123
+ # Handle both dict and object responses
124
+ if isinstance(response_data, dict):
125
+ # Response is a plain dict
126
+ data = response_data
127
+ else:
128
+ # Response is an object with additional_properties
129
+ data = (
130
+ response_data.additional_properties
131
+ if hasattr(response_data, "additional_properties")
132
+ else response_data
133
+ )
134
+
117
135
  # Check if this is an immediate response
118
- if hasattr(response_data, "data") and hasattr(response_data, "columns"):
136
+ if "data" in data and "columns" in data:
119
137
  return QueryResult(
120
- data=response_data.data,
121
- columns=response_data.columns,
122
- row_count=getattr(response_data, "row_count", len(response_data.data)),
123
- execution_time_ms=getattr(response_data, "execution_time_ms", 0),
138
+ data=data["data"],
139
+ columns=data["columns"],
140
+ row_count=data.get("row_count", len(data["data"])),
141
+ execution_time_ms=data.get("execution_time_ms", 0),
124
142
  graph_id=graph_id,
125
- timestamp=getattr(response_data, "timestamp", datetime.now().isoformat()),
143
+ timestamp=data.get("timestamp", datetime.now().isoformat()),
126
144
  )
127
145
 
128
146
  # Check if this is a queued response
129
- if (
130
- hasattr(response_data, "status")
131
- and response_data.status == "queued"
132
- and hasattr(response_data, "operation_id")
133
- ):
147
+ if data.get("status") == "queued" and "operation_id" in data:
134
148
  queued_response = QueuedQueryResponse(
135
- status=response_data.status,
136
- operation_id=response_data.operation_id,
137
- queue_position=getattr(response_data, "queue_position", 0),
138
- estimated_wait_seconds=getattr(response_data, "estimated_wait_seconds", 0),
139
- message=getattr(response_data, "message", "Query queued"),
149
+ status=data["status"],
150
+ operation_id=data["operation_id"],
151
+ queue_position=data.get("queue_position", 0),
152
+ estimated_wait_seconds=data.get("estimated_wait_seconds", 0),
153
+ message=data.get("message", "Query queued"),
140
154
  )
141
155
 
142
156
  # Notify about queue status
@@ -182,7 +196,7 @@ class QueryClient:
182
196
  error = None
183
197
 
184
198
  # Set up SSE connection
185
- sse_config = SSEConfig(base_url=self.base_url)
199
+ sse_config = SSEConfig(base_url=self.base_url, headers=self.headers)
186
200
  self.sse_client = SSEClient(sse_config)
187
201
 
188
202
  # Set up event handlers
@@ -124,9 +124,11 @@ class SSEClient:
124
124
 
125
125
  try:
126
126
  event_buffer = {"event": None, "data": [], "id": None, "retry": None}
127
+ print("[SSE DEBUG] Starting to process events...")
127
128
 
128
129
  for line in self._response.iter_lines():
129
130
  if self.closed:
131
+ print("[SSE DEBUG] Stream closed, breaking out of loop")
130
132
  break
131
133
 
132
134
  line = line.strip()
@@ -134,6 +136,7 @@ class SSEClient:
134
136
  # Empty line indicates end of event
135
137
  if not line:
136
138
  if event_buffer["data"] or event_buffer["event"]:
139
+ print(f"[SSE DEBUG] Dispatching event: {event_buffer.get('event')}")
137
140
  self._dispatch_event(event_buffer)
138
141
  event_buffer = {"event": None, "data": [], "id": None, "retry": None}
139
142
  continue
@@ -169,9 +172,13 @@ class SSEClient:
169
172
 
170
173
  # Handle final event if stream ends without empty line
171
174
  if event_buffer["data"] or event_buffer["event"]:
175
+ print("[SSE DEBUG] Dispatching final event after stream end")
172
176
  self._dispatch_event(event_buffer)
173
177
 
178
+ print("[SSE DEBUG] Event processing loop ended")
179
+
174
180
  except Exception as error:
181
+ print(f"[SSE DEBUG] Exception in event processing: {error}")
175
182
  if not self.closed:
176
183
  self.emit("error", error)
177
184
 
@@ -394,9 +401,13 @@ class AsyncSSEClient:
394
401
 
395
402
  # Handle final event if stream ends without empty line
396
403
  if event_buffer["data"] or event_buffer["event"]:
404
+ print("[SSE DEBUG] Dispatching final event after stream end")
397
405
  self._dispatch_event(event_buffer)
398
406
 
407
+ print("[SSE DEBUG] Event processing loop ended")
408
+
399
409
  except Exception as error:
410
+ print(f"[SSE DEBUG] Exception in event processing: {error}")
400
411
  if not self.closed:
401
412
  self.emit("error", error)
402
413
 
@@ -0,0 +1,466 @@
1
+ """Table Ingest Client for RoboSystems API
2
+
3
+ Simplifies uploading Parquet files to staging tables and ingesting them into graphs.
4
+ """
5
+
6
+ from dataclasses import dataclass
7
+ from io import BytesIO
8
+ from pathlib import Path
9
+ from typing import Dict, Any, Optional, Callable, List, Union, BinaryIO
10
+ import json
11
+ import logging
12
+ import httpx
13
+
14
+ from ..api.tables.get_upload_url_v1_graphs_graph_id_tables_table_name_files_post import (
15
+ sync_detailed as get_upload_url,
16
+ )
17
+ from ..api.tables.update_file_v1_graphs_graph_id_tables_files_file_id_patch import (
18
+ sync_detailed as update_file,
19
+ )
20
+ from ..api.tables.list_tables_v1_graphs_graph_id_tables_get import (
21
+ sync_detailed as list_tables,
22
+ )
23
+ from ..api.tables.ingest_tables_v1_graphs_graph_id_tables_ingest_post import (
24
+ sync_detailed as ingest_tables,
25
+ )
26
+ from ..models.file_upload_request import FileUploadRequest
27
+ from ..models.file_update_request import FileUpdateRequest
28
+ from ..models.bulk_ingest_request import BulkIngestRequest
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ @dataclass
34
+ class UploadOptions:
35
+ """Options for file upload operations"""
36
+
37
+ on_progress: Optional[Callable[[str], None]] = None
38
+ fix_localstack_url: bool = True # Auto-fix LocalStack URLs for localhost
39
+ file_name: Optional[str] = None # Override file name (useful for buffer uploads)
40
+
41
+
42
+ @dataclass
43
+ class IngestOptions:
44
+ """Options for table ingestion operations"""
45
+
46
+ ignore_errors: bool = True
47
+ rebuild: bool = False
48
+ on_progress: Optional[Callable[[str], None]] = None
49
+
50
+
51
+ @dataclass
52
+ class UploadResult:
53
+ """Result from file upload operation"""
54
+
55
+ file_id: str
56
+ file_size: int
57
+ row_count: int
58
+ table_name: str
59
+ file_name: str
60
+ success: bool = True
61
+ error: Optional[str] = None
62
+
63
+
64
+ @dataclass
65
+ class TableInfo:
66
+ """Information about a staging table"""
67
+
68
+ table_name: str
69
+ row_count: int
70
+ file_count: int
71
+ total_size_bytes: int
72
+
73
+
74
+ class TableIngestClient:
75
+ """Enhanced table ingest client with simplified upload workflow"""
76
+
77
+ def __init__(self, config: Dict[str, Any]):
78
+ self.config = config
79
+ self.base_url = config["base_url"]
80
+ self.headers = config.get("headers", {})
81
+ self.token = config.get("token")
82
+ # Create httpx client for S3 uploads
83
+ self._http_client = httpx.Client(timeout=120.0)
84
+
85
+ def upload_parquet_file(
86
+ self,
87
+ graph_id: str,
88
+ table_name: str,
89
+ file_or_buffer: Union[Path, str, BytesIO, BinaryIO],
90
+ options: Optional[UploadOptions] = None,
91
+ ) -> UploadResult:
92
+ """
93
+ Upload a Parquet file to a staging table.
94
+
95
+ This method handles the complete 3-step upload process:
96
+ 1. Get presigned upload URL
97
+ 2. Upload file to S3
98
+ 3. Update file metadata
99
+
100
+ Args:
101
+ graph_id: The graph ID
102
+ table_name: Name of the staging table
103
+ file_or_buffer: Path to the Parquet file or BytesIO/BinaryIO buffer
104
+ options: Upload options
105
+
106
+ Returns:
107
+ UploadResult with upload details
108
+ """
109
+ if options is None:
110
+ options = UploadOptions()
111
+
112
+ # Auto-detect if this is a file path or buffer
113
+ is_buffer = isinstance(file_or_buffer, (BytesIO, BinaryIO)) or hasattr(
114
+ file_or_buffer, "read"
115
+ )
116
+
117
+ if is_buffer:
118
+ # Handle buffer upload
119
+ file_name = options.file_name or "data.parquet"
120
+ else:
121
+ # Handle file path upload
122
+ file_path = Path(file_or_buffer)
123
+ file_name = file_path.name
124
+ if not file_path.exists():
125
+ return UploadResult(
126
+ file_id="",
127
+ file_size=0,
128
+ row_count=0,
129
+ table_name=table_name,
130
+ file_name=file_name,
131
+ success=False,
132
+ error=f"File not found: {file_path}",
133
+ )
134
+
135
+ try:
136
+ # Import client here to avoid circular imports
137
+ from ..client import AuthenticatedClient
138
+
139
+ # Create authenticated client with X-API-Key
140
+ # The token is extracted from X-API-Key header in extensions.py
141
+ if not self.token:
142
+ return UploadResult(
143
+ file_id="",
144
+ file_size=0,
145
+ row_count=0,
146
+ table_name=table_name,
147
+ file_name=file_name,
148
+ success=False,
149
+ error="No API key provided. Set X-API-Key in headers.",
150
+ )
151
+
152
+ client = AuthenticatedClient(
153
+ base_url=self.base_url,
154
+ token=self.token,
155
+ prefix="", # No prefix for X-API-Key
156
+ auth_header_name="X-API-Key", # Use X-API-Key header instead of Authorization
157
+ headers=self.headers,
158
+ )
159
+
160
+ # Step 1: Get presigned upload URL
161
+ if options.on_progress:
162
+ options.on_progress(
163
+ f"Getting upload URL for {file_name} -> table '{table_name}'..."
164
+ )
165
+
166
+ upload_request = FileUploadRequest(
167
+ file_name=file_name, content_type="application/x-parquet"
168
+ )
169
+
170
+ kwargs = {
171
+ "graph_id": graph_id,
172
+ "table_name": table_name,
173
+ "client": client,
174
+ "body": upload_request,
175
+ }
176
+
177
+ response = get_upload_url(**kwargs)
178
+
179
+ if not response.parsed:
180
+ error_msg = f"Failed to get upload URL (status: {response.status_code})"
181
+ if hasattr(response, "content"):
182
+ try:
183
+ error_detail = json.loads(response.content)
184
+ error_msg = f"{error_msg}: {error_detail}"
185
+ except (json.JSONDecodeError, ValueError):
186
+ error_msg = f"{error_msg}: {response.content[:200]}"
187
+
188
+ return UploadResult(
189
+ file_id="",
190
+ file_size=0,
191
+ row_count=0,
192
+ table_name=table_name,
193
+ file_name=file_name,
194
+ success=False,
195
+ error=error_msg,
196
+ )
197
+
198
+ upload_url = response.parsed.upload_url
199
+ file_id = response.parsed.file_id
200
+
201
+ # Fix LocalStack URL if needed
202
+ if options.fix_localstack_url and "localstack:4566" in upload_url:
203
+ upload_url = upload_url.replace("localstack:4566", "localhost:4566")
204
+ logger.debug("Fixed LocalStack URL for localhost access")
205
+
206
+ # Step 2: Upload file to S3
207
+ if options.on_progress:
208
+ options.on_progress(f"Uploading {file_name} to S3...")
209
+
210
+ # Read file content - handle both paths and buffers
211
+ if is_buffer:
212
+ # Read from buffer
213
+ if hasattr(file_or_buffer, "getvalue"):
214
+ file_content = file_or_buffer.getvalue()
215
+ else:
216
+ # BinaryIO or file-like object
217
+ file_or_buffer.seek(0)
218
+ file_content = file_or_buffer.read()
219
+ file_size = len(file_content)
220
+ else:
221
+ # Read from file path
222
+ with open(file_path, "rb") as f:
223
+ file_content = f.read()
224
+ file_size = len(file_content)
225
+
226
+ s3_response = self._http_client.put(
227
+ upload_url,
228
+ content=file_content,
229
+ headers={"Content-Type": "application/x-parquet"},
230
+ )
231
+ s3_response.raise_for_status()
232
+
233
+ # Step 3: Get row count and update file metadata
234
+ if options.on_progress:
235
+ options.on_progress(f"Updating file metadata for {file_name}...")
236
+
237
+ try:
238
+ import pyarrow.parquet as pq
239
+
240
+ if is_buffer:
241
+ # Read from buffer for row count
242
+ if hasattr(file_or_buffer, "seek"):
243
+ file_or_buffer.seek(0)
244
+ parquet_table = pq.read_table(file_or_buffer)
245
+ else:
246
+ # Read from file path
247
+ parquet_table = pq.read_table(file_path)
248
+
249
+ row_count = parquet_table.num_rows
250
+ except ImportError:
251
+ logger.warning(
252
+ "pyarrow not installed, row count will be estimated from file size"
253
+ )
254
+ # Rough estimate: ~100 bytes per row for typical data
255
+ row_count = file_size // 100
256
+
257
+ metadata_update = FileUpdateRequest(
258
+ file_size_bytes=file_size, row_count=row_count
259
+ )
260
+
261
+ kwargs = {
262
+ "graph_id": graph_id,
263
+ "file_id": file_id,
264
+ "client": client,
265
+ "body": metadata_update,
266
+ }
267
+
268
+ update_response = update_file(**kwargs)
269
+
270
+ if not update_response.parsed:
271
+ return UploadResult(
272
+ file_id=file_id,
273
+ file_size=file_size,
274
+ row_count=row_count,
275
+ table_name=table_name,
276
+ file_name=file_name,
277
+ success=False,
278
+ error="Failed to update file metadata",
279
+ )
280
+
281
+ if options.on_progress:
282
+ options.on_progress(
283
+ f"✅ Uploaded {file_name} ({file_size:,} bytes, {row_count:,} rows)"
284
+ )
285
+
286
+ return UploadResult(
287
+ file_id=file_id,
288
+ file_size=file_size,
289
+ row_count=row_count,
290
+ table_name=table_name,
291
+ file_name=file_name,
292
+ success=True,
293
+ )
294
+
295
+ except Exception as e:
296
+ logger.error(f"Upload failed for {file_name}: {e}")
297
+ return UploadResult(
298
+ file_id="",
299
+ file_size=0,
300
+ row_count=0,
301
+ table_name=table_name,
302
+ file_name=file_name,
303
+ success=False,
304
+ error=str(e),
305
+ )
306
+
307
+ def list_staging_tables(self, graph_id: str) -> List[TableInfo]:
308
+ """
309
+ List all staging tables in a graph.
310
+
311
+ Args:
312
+ graph_id: The graph ID
313
+
314
+ Returns:
315
+ List of TableInfo objects
316
+ """
317
+ try:
318
+ from ..client import AuthenticatedClient
319
+
320
+ if not self.token:
321
+ logger.error("No API key provided")
322
+ return []
323
+
324
+ client = AuthenticatedClient(
325
+ base_url=self.base_url,
326
+ token=self.token,
327
+ prefix="",
328
+ auth_header_name="X-API-Key",
329
+ headers=self.headers,
330
+ )
331
+
332
+ kwargs = {"graph_id": graph_id, "client": client}
333
+
334
+ response = list_tables(**kwargs)
335
+
336
+ if not response.parsed:
337
+ logger.error("Failed to list tables")
338
+ return []
339
+
340
+ tables = []
341
+ for table_data in response.parsed.tables:
342
+ tables.append(
343
+ TableInfo(
344
+ table_name=table_data.table_name,
345
+ row_count=table_data.row_count,
346
+ file_count=table_data.file_count,
347
+ total_size_bytes=table_data.total_size_bytes,
348
+ )
349
+ )
350
+
351
+ return tables
352
+
353
+ except Exception as e:
354
+ logger.error(f"Failed to list tables: {e}")
355
+ return []
356
+
357
+ def ingest_all_tables(
358
+ self, graph_id: str, options: Optional[IngestOptions] = None
359
+ ) -> Dict[str, Any]:
360
+ """
361
+ Ingest all staging tables into the graph.
362
+
363
+ Args:
364
+ graph_id: The graph ID
365
+ options: Ingest options
366
+
367
+ Returns:
368
+ Dictionary with ingestion results
369
+ """
370
+ if options is None:
371
+ options = IngestOptions()
372
+
373
+ try:
374
+ from ..client import AuthenticatedClient
375
+
376
+ if not self.token:
377
+ return {"success": False, "error": "No API key provided"}
378
+
379
+ client = AuthenticatedClient(
380
+ base_url=self.base_url,
381
+ token=self.token,
382
+ prefix="",
383
+ auth_header_name="X-API-Key",
384
+ headers=self.headers,
385
+ )
386
+
387
+ if options.on_progress:
388
+ options.on_progress("Starting table ingestion...")
389
+
390
+ ingest_request = BulkIngestRequest(
391
+ ignore_errors=options.ignore_errors, rebuild=options.rebuild
392
+ )
393
+
394
+ kwargs = {
395
+ "graph_id": graph_id,
396
+ "client": client,
397
+ "body": ingest_request,
398
+ }
399
+
400
+ response = ingest_tables(**kwargs)
401
+
402
+ if not response.parsed:
403
+ return {"success": False, "error": "Failed to ingest tables"}
404
+
405
+ result = {
406
+ "success": True,
407
+ "operation_id": getattr(response.parsed, "operation_id", None),
408
+ "message": getattr(response.parsed, "message", "Ingestion started"),
409
+ }
410
+
411
+ if options.on_progress:
412
+ options.on_progress("✅ Table ingestion completed")
413
+
414
+ return result
415
+
416
+ except Exception as e:
417
+ logger.error(f"Failed to ingest tables: {e}")
418
+ return {"success": False, "error": str(e)}
419
+
420
+ def upload_and_ingest(
421
+ self,
422
+ graph_id: str,
423
+ table_name: str,
424
+ file_path: Path,
425
+ upload_options: Optional[UploadOptions] = None,
426
+ ingest_options: Optional[IngestOptions] = None,
427
+ ) -> Dict[str, Any]:
428
+ """
429
+ Convenience method to upload a file and immediately ingest it.
430
+
431
+ Args:
432
+ graph_id: The graph ID
433
+ table_name: Name of the staging table
434
+ file_path: Path to the Parquet file
435
+ upload_options: Upload options
436
+ ingest_options: Ingest options
437
+
438
+ Returns:
439
+ Dictionary with upload and ingest results
440
+ """
441
+ # Upload the file
442
+ upload_result = self.upload_parquet_file(
443
+ graph_id, table_name, file_path, upload_options
444
+ )
445
+
446
+ if not upload_result.success:
447
+ return {
448
+ "success": False,
449
+ "upload": upload_result,
450
+ "ingest": None,
451
+ "error": upload_result.error,
452
+ }
453
+
454
+ # Ingest the table
455
+ ingest_result = self.ingest_all_tables(graph_id, ingest_options)
456
+
457
+ return {
458
+ "success": upload_result.success and ingest_result.get("success", False),
459
+ "upload": upload_result,
460
+ "ingest": ingest_result,
461
+ }
462
+
463
+ def close(self):
464
+ """Close HTTP client connections"""
465
+ if self._http_client:
466
+ self._http_client.close()