robosystems-client 0.1.17__py3-none-any.whl → 0.1.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of robosystems-client might be problematic. Click here for more details.
- robosystems_client/__init__.py +15 -4
- robosystems_client/api/agent/auto_select_agent.py +25 -0
- robosystems_client/api/agent/batch_process_queries.py +25 -0
- robosystems_client/api/agent/execute_specific_agent.py +25 -0
- robosystems_client/api/agent/get_agent_metadata.py +25 -0
- robosystems_client/api/agent/list_agents.py +20 -0
- robosystems_client/api/agent/recommend_agent.py +25 -0
- robosystems_client/api/backup/create_backup.py +25 -0
- robosystems_client/api/backup/export_backup.py +25 -0
- robosystems_client/api/backup/get_backup_download_url.py +20 -0
- robosystems_client/api/backup/get_backup_stats.py +25 -0
- robosystems_client/api/backup/list_backups.py +20 -0
- robosystems_client/api/backup/restore_backup.py +25 -0
- robosystems_client/api/connections/create_connection.py +25 -0
- robosystems_client/api/connections/create_link_token.py +25 -0
- robosystems_client/api/connections/delete_connection.py +25 -0
- robosystems_client/api/connections/exchange_link_token.py +25 -0
- robosystems_client/api/connections/get_connection.py +25 -0
- robosystems_client/api/connections/get_connection_options.py +25 -0
- robosystems_client/api/connections/init_o_auth.py +25 -0
- robosystems_client/api/connections/list_connections.py +20 -0
- robosystems_client/api/connections/oauth_callback.py +25 -0
- robosystems_client/api/connections/sync_connection.py +25 -0
- robosystems_client/api/copy/copy_data_to_graph.py +25 -0
- robosystems_client/api/create/create_graph.py +25 -0
- robosystems_client/api/graph_analytics/get_graph_metrics.py +25 -0
- robosystems_client/api/graph_analytics/get_graph_usage_stats.py +20 -0
- robosystems_client/api/graph_billing/get_current_graph_bill.py +25 -0
- robosystems_client/api/graph_billing/get_graph_billing_history.py +20 -0
- robosystems_client/api/graph_billing/get_graph_monthly_bill.py +25 -0
- robosystems_client/api/graph_billing/get_graph_usage_details.py +20 -0
- robosystems_client/api/graph_credits/check_credit_balance.py +20 -0
- robosystems_client/api/graph_credits/check_storage_limits.py +25 -0
- robosystems_client/api/graph_credits/get_credit_summary.py +25 -0
- robosystems_client/api/graph_credits/get_storage_usage.py +20 -0
- robosystems_client/api/graph_credits/list_credit_transactions.py +20 -0
- robosystems_client/api/graph_health/get_database_health.py +25 -0
- robosystems_client/api/graph_info/get_database_info.py +25 -0
- robosystems_client/api/graph_limits/get_graph_limits.py +25 -0
- robosystems_client/api/mcp/call_mcp_tool.py +20 -0
- robosystems_client/api/mcp/list_mcp_tools.py +25 -0
- robosystems_client/api/operations/cancel_operation.py +25 -0
- robosystems_client/api/operations/get_operation_status.py +25 -0
- robosystems_client/api/operations/stream_operation_events.py +20 -0
- robosystems_client/api/query/execute_cypher_query.py +20 -0
- robosystems_client/api/schema/export_graph_schema.py +20 -0
- robosystems_client/api/schema/get_graph_schema_info.py +25 -0
- robosystems_client/api/schema/list_schema_extensions.py +25 -0
- robosystems_client/api/schema/validate_schema.py +25 -0
- robosystems_client/api/subgraphs/create_subgraph.py +25 -0
- robosystems_client/api/subgraphs/delete_subgraph.py +25 -0
- robosystems_client/api/subgraphs/get_subgraph_info.py +25 -0
- robosystems_client/api/subgraphs/get_subgraph_quota.py +25 -0
- robosystems_client/api/subgraphs/list_subgraphs.py +25 -0
- robosystems_client/api/user/create_user_api_key.py +25 -0
- robosystems_client/api/user/get_all_credit_summaries.py +25 -0
- robosystems_client/api/user/get_current_user.py +25 -0
- robosystems_client/api/user/get_user_graphs.py +25 -0
- robosystems_client/api/user/list_user_api_keys.py +25 -0
- robosystems_client/api/user/revoke_user_api_key.py +25 -0
- robosystems_client/api/user/select_user_graph.py +25 -0
- robosystems_client/api/user/update_user.py +25 -0
- robosystems_client/api/user/update_user_api_key.py +25 -0
- robosystems_client/api/user/update_user_password.py +25 -0
- robosystems_client/api/user_analytics/get_detailed_user_analytics.py +20 -0
- robosystems_client/api/user_analytics/get_user_usage_overview.py +25 -0
- robosystems_client/api/user_limits/get_all_shared_repository_limits.py +25 -0
- robosystems_client/api/user_limits/get_shared_repository_limits.py +25 -0
- robosystems_client/api/user_limits/get_user_limits.py +25 -0
- robosystems_client/api/user_limits/get_user_usage.py +25 -0
- robosystems_client/api/user_subscriptions/cancel_shared_repository_subscription.py +25 -0
- robosystems_client/api/user_subscriptions/get_repository_credits.py +25 -0
- robosystems_client/api/user_subscriptions/get_shared_repository_credits.py +25 -0
- robosystems_client/api/user_subscriptions/get_user_shared_subscriptions.py +20 -0
- robosystems_client/api/user_subscriptions/subscribe_to_shared_repository.py +25 -0
- robosystems_client/api/user_subscriptions/upgrade_shared_repository_subscription.py +25 -0
- robosystems_client/extensions/__init__.py +70 -0
- robosystems_client/extensions/auth_integration.py +14 -1
- robosystems_client/extensions/copy_client.py +32 -22
- robosystems_client/extensions/dataframe_utils.py +455 -0
- robosystems_client/extensions/extensions.py +16 -0
- robosystems_client/extensions/operation_client.py +43 -21
- robosystems_client/extensions/query_client.py +109 -12
- robosystems_client/extensions/tests/test_dataframe_utils.py +334 -0
- robosystems_client/extensions/tests/test_integration.py +1 -1
- robosystems_client/extensions/tests/test_token_utils.py +274 -0
- robosystems_client/extensions/token_utils.py +417 -0
- robosystems_client/extensions/utils.py +32 -2
- {robosystems_client-0.1.17.dist-info → robosystems_client-0.1.18.dist-info}/METADATA +1 -1
- {robosystems_client-0.1.17.dist-info → robosystems_client-0.1.18.dist-info}/RECORD +92 -88
- {robosystems_client-0.1.17.dist-info → robosystems_client-0.1.18.dist-info}/WHEEL +0 -0
- {robosystems_client-0.1.17.dist-info → robosystems_client-0.1.18.dist-info}/licenses/LICENSE +0 -0
|
@@ -54,6 +54,47 @@ from .auth_integration import (
|
|
|
54
54
|
create_development_extensions,
|
|
55
55
|
)
|
|
56
56
|
|
|
57
|
+
# JWT Token utilities
|
|
58
|
+
from .token_utils import (
|
|
59
|
+
validate_jwt_format,
|
|
60
|
+
extract_jwt_from_header,
|
|
61
|
+
decode_jwt_payload,
|
|
62
|
+
is_jwt_expired,
|
|
63
|
+
get_jwt_claims,
|
|
64
|
+
get_jwt_expiration,
|
|
65
|
+
extract_token_from_environment,
|
|
66
|
+
extract_token_from_cookie,
|
|
67
|
+
find_valid_token,
|
|
68
|
+
TokenManager,
|
|
69
|
+
TokenSource,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# DataFrame utilities (optional - requires pandas)
|
|
73
|
+
try:
|
|
74
|
+
from .dataframe_utils import (
|
|
75
|
+
query_result_to_dataframe,
|
|
76
|
+
DataFrameQueryClient,
|
|
77
|
+
HAS_PANDAS,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# Re-export the imported functions for module API
|
|
81
|
+
from .dataframe_utils import (
|
|
82
|
+
parse_datetime_columns,
|
|
83
|
+
stream_to_dataframe as _stream_to_dataframe,
|
|
84
|
+
dataframe_to_cypher_params,
|
|
85
|
+
export_query_to_csv,
|
|
86
|
+
compare_dataframes,
|
|
87
|
+
)
|
|
88
|
+
except ImportError:
|
|
89
|
+
HAS_PANDAS = False
|
|
90
|
+
DataFrameQueryClient = None
|
|
91
|
+
# Set placeholders for optional functions
|
|
92
|
+
parse_datetime_columns = None
|
|
93
|
+
_stream_to_dataframe = None
|
|
94
|
+
dataframe_to_cypher_params = None
|
|
95
|
+
export_query_to_csv = None
|
|
96
|
+
compare_dataframes = None
|
|
97
|
+
|
|
57
98
|
__all__ = [
|
|
58
99
|
# Core extension classes
|
|
59
100
|
"RoboSystemsExtensions",
|
|
@@ -101,6 +142,21 @@ __all__ = [
|
|
|
101
142
|
"create_extensions",
|
|
102
143
|
"create_production_extensions",
|
|
103
144
|
"create_development_extensions",
|
|
145
|
+
# JWT Token utilities
|
|
146
|
+
"validate_jwt_format",
|
|
147
|
+
"extract_jwt_from_header",
|
|
148
|
+
"decode_jwt_payload",
|
|
149
|
+
"is_jwt_expired",
|
|
150
|
+
"get_jwt_claims",
|
|
151
|
+
"get_jwt_expiration",
|
|
152
|
+
"extract_token_from_environment",
|
|
153
|
+
"extract_token_from_cookie",
|
|
154
|
+
"find_valid_token",
|
|
155
|
+
"TokenManager",
|
|
156
|
+
"TokenSource",
|
|
157
|
+
# DataFrame utilities (optional)
|
|
158
|
+
"HAS_PANDAS",
|
|
159
|
+
"DataFrameQueryClient",
|
|
104
160
|
]
|
|
105
161
|
|
|
106
162
|
# Create a default extensions instance
|
|
@@ -135,3 +191,17 @@ def copy_from_s3(
|
|
|
135
191
|
return extensions.copy_from_s3(
|
|
136
192
|
graph_id, table_name, s3_path, access_key_id, secret_access_key, **kwargs
|
|
137
193
|
)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
# DataFrame convenience functions (if pandas is available)
|
|
197
|
+
if HAS_PANDAS:
|
|
198
|
+
|
|
199
|
+
def query_to_dataframe(graph_id: str, query: str, parameters=None, **kwargs):
|
|
200
|
+
"""Execute query and return results as pandas DataFrame"""
|
|
201
|
+
result = execute_query(graph_id, query, parameters)
|
|
202
|
+
return query_result_to_dataframe(result, **kwargs)
|
|
203
|
+
|
|
204
|
+
def stream_to_dataframe(graph_id: str, query: str, parameters=None, chunk_size=10000):
|
|
205
|
+
"""Stream query results and return as pandas DataFrame"""
|
|
206
|
+
stream = stream_query(graph_id, query, parameters, chunk_size)
|
|
207
|
+
return _stream_to_dataframe(stream, chunk_size)
|
|
@@ -36,6 +36,9 @@ class AuthenticatedExtensions(RoboSystemsExtensions):
|
|
|
36
36
|
config.headers["X-API-Key"] = api_key
|
|
37
37
|
config.headers["Authorization"] = f"Bearer {api_key}"
|
|
38
38
|
|
|
39
|
+
# Store the token for later use by child clients
|
|
40
|
+
self._token = api_key
|
|
41
|
+
|
|
39
42
|
super().__init__(config)
|
|
40
43
|
|
|
41
44
|
# Store authenticated client for SDK operations
|
|
@@ -57,8 +60,12 @@ class AuthenticatedExtensions(RoboSystemsExtensions):
|
|
|
57
60
|
|
|
58
61
|
request = CypherQueryRequest(query=query, parameters=parameters or {})
|
|
59
62
|
|
|
63
|
+
# Pass the token parameter along with the client
|
|
60
64
|
response = sync_detailed(
|
|
61
|
-
graph_id=graph_id,
|
|
65
|
+
graph_id=graph_id,
|
|
66
|
+
client=self._authenticated_client,
|
|
67
|
+
body=request,
|
|
68
|
+
token=self._authenticated_client.token,
|
|
62
69
|
)
|
|
63
70
|
|
|
64
71
|
if response.parsed:
|
|
@@ -96,6 +103,9 @@ class CookieAuthExtensions(RoboSystemsExtensions):
|
|
|
96
103
|
elif not config.base_url:
|
|
97
104
|
config.base_url = "https://api.robosystems.ai"
|
|
98
105
|
|
|
106
|
+
# Extract token from cookies if present
|
|
107
|
+
self._token = cookies.get("auth-token")
|
|
108
|
+
|
|
99
109
|
super().__init__(config)
|
|
100
110
|
|
|
101
111
|
# Store cookies for requests
|
|
@@ -138,6 +148,9 @@ class TokenExtensions(RoboSystemsExtensions):
|
|
|
138
148
|
config.headers = {}
|
|
139
149
|
config.headers["Authorization"] = f"Bearer {token}"
|
|
140
150
|
|
|
151
|
+
# Store the token for later use by child clients
|
|
152
|
+
self._token = token
|
|
153
|
+
|
|
141
154
|
super().__init__(config)
|
|
142
155
|
|
|
143
156
|
# Store authenticated client
|
|
@@ -74,12 +74,11 @@ class CopyClient:
|
|
|
74
74
|
def __init__(self, config: Dict[str, Any]):
|
|
75
75
|
self.config = config
|
|
76
76
|
self.base_url = config["base_url"]
|
|
77
|
+
self.headers = config.get("headers", {})
|
|
78
|
+
# Get token from config if passed by parent
|
|
79
|
+
self.token = config.get("token")
|
|
77
80
|
self.sse_client: Optional[SSEClient] = None
|
|
78
81
|
|
|
79
|
-
# Get client authentication if provided
|
|
80
|
-
self.auth_token = config.get("auth_token")
|
|
81
|
-
self.api_key = config.get("api_key")
|
|
82
|
-
|
|
83
82
|
def copy_from_s3(
|
|
84
83
|
self, graph_id: str, request: S3CopyRequest, options: Optional[CopyOptions] = None
|
|
85
84
|
) -> CopyResult:
|
|
@@ -115,18 +114,18 @@ class CopyClient:
|
|
|
115
114
|
start_time = time.time()
|
|
116
115
|
|
|
117
116
|
# Import client here to avoid circular imports
|
|
118
|
-
from ..client import
|
|
117
|
+
from ..client import Client
|
|
119
118
|
|
|
120
|
-
# Create
|
|
121
|
-
client =
|
|
122
|
-
base_url=self.base_url,
|
|
123
|
-
token=self.auth_token,
|
|
124
|
-
headers={"X-API-Key": self.api_key} if self.api_key else None,
|
|
125
|
-
)
|
|
119
|
+
# Create client with headers
|
|
120
|
+
client = Client(base_url=self.base_url, headers=self.headers)
|
|
126
121
|
|
|
127
122
|
try:
|
|
128
|
-
# Execute the copy request
|
|
129
|
-
|
|
123
|
+
# Execute the copy request with token if available
|
|
124
|
+
kwargs = {"graph_id": graph_id, "client": client, "body": request}
|
|
125
|
+
# Only add token if it's a valid string
|
|
126
|
+
if self.token and isinstance(self.token, str) and self.token.strip():
|
|
127
|
+
kwargs["token"] = self.token
|
|
128
|
+
response = copy_data_to_graph(**kwargs)
|
|
130
129
|
|
|
131
130
|
if response.parsed:
|
|
132
131
|
response_data: CopyResponse = response.parsed
|
|
@@ -164,11 +163,24 @@ class CopyClient:
|
|
|
164
163
|
)
|
|
165
164
|
|
|
166
165
|
except Exception as e:
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
)
|
|
166
|
+
error_msg = str(e)
|
|
167
|
+
# Check for authentication errors
|
|
168
|
+
if (
|
|
169
|
+
"401" in error_msg or "403" in error_msg or "unauthorized" in error_msg.lower()
|
|
170
|
+
):
|
|
171
|
+
logger.error(f"Authentication failed during copy operation: {e}")
|
|
172
|
+
return CopyResult(
|
|
173
|
+
status="failed",
|
|
174
|
+
error=f"Authentication failed: {error_msg}",
|
|
175
|
+
execution_time_ms=(time.time() - start_time) * 1000,
|
|
176
|
+
)
|
|
177
|
+
else:
|
|
178
|
+
logger.error(f"Copy operation failed: {e}")
|
|
179
|
+
return CopyResult(
|
|
180
|
+
status="failed",
|
|
181
|
+
error=error_msg,
|
|
182
|
+
execution_time_ms=(time.time() - start_time) * 1000,
|
|
183
|
+
)
|
|
172
184
|
|
|
173
185
|
def _monitor_copy_operation(
|
|
174
186
|
self, operation_id: str, options: CopyOptions, start_time: float
|
|
@@ -449,10 +461,8 @@ class AsyncCopyClient:
|
|
|
449
461
|
self.config = config
|
|
450
462
|
self.base_url = config["base_url"]
|
|
451
463
|
self.sse_client: Optional[AsyncSSEClient] = None
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
self.auth_token = config.get("auth_token")
|
|
455
|
-
self.api_key = config.get("api_key")
|
|
464
|
+
# Get token from config if passed by parent
|
|
465
|
+
self.token = config.get("token")
|
|
456
466
|
|
|
457
467
|
async def copy_from_s3(
|
|
458
468
|
self, graph_id: str, request: S3CopyRequest, options: Optional[CopyOptions] = None
|
|
@@ -0,0 +1,455 @@
|
|
|
1
|
+
"""Pandas DataFrame integration utilities for RoboSystems SDK
|
|
2
|
+
|
|
3
|
+
Provides seamless integration between query results and Pandas DataFrames.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Dict, Any, Optional, List, Union, TYPE_CHECKING
|
|
7
|
+
import logging
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from .query_client import QueryResult
|
|
11
|
+
|
|
12
|
+
# Make pandas optional to avoid forcing dependency
|
|
13
|
+
try:
|
|
14
|
+
import pandas as pd
|
|
15
|
+
|
|
16
|
+
HAS_PANDAS = True
|
|
17
|
+
except ImportError:
|
|
18
|
+
HAS_PANDAS = False
|
|
19
|
+
pd = None
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def require_pandas():
|
|
25
|
+
"""Check if pandas is available, raise helpful error if not"""
|
|
26
|
+
if not HAS_PANDAS:
|
|
27
|
+
raise ImportError(
|
|
28
|
+
"Pandas is required for DataFrame features. Install it with: pip install pandas"
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def query_result_to_dataframe(
|
|
33
|
+
result: Union[Dict[str, Any], "QueryResult"],
|
|
34
|
+
normalize_nested: bool = True,
|
|
35
|
+
parse_dates: bool = True,
|
|
36
|
+
) -> "pd.DataFrame":
|
|
37
|
+
"""Convert query result to Pandas DataFrame
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
result: Query result dict or QueryResult object
|
|
41
|
+
normalize_nested: Flatten nested dictionaries in results
|
|
42
|
+
parse_dates: Automatically parse date/datetime strings
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
Pandas DataFrame with query results
|
|
46
|
+
|
|
47
|
+
Example:
|
|
48
|
+
>>> result = query_client.query(graph_id, "MATCH (c:Company) RETURN c")
|
|
49
|
+
>>> df = query_result_to_dataframe(result)
|
|
50
|
+
>>> print(df.head())
|
|
51
|
+
"""
|
|
52
|
+
require_pandas()
|
|
53
|
+
|
|
54
|
+
# Handle QueryResult object
|
|
55
|
+
if hasattr(result, "data") and hasattr(result, "columns"):
|
|
56
|
+
data = result.data
|
|
57
|
+
columns = result.columns
|
|
58
|
+
# Handle dict result
|
|
59
|
+
elif isinstance(result, dict):
|
|
60
|
+
data = result.get("data", [])
|
|
61
|
+
columns = result.get("columns", [])
|
|
62
|
+
else:
|
|
63
|
+
raise ValueError("Invalid result format")
|
|
64
|
+
|
|
65
|
+
# Create DataFrame
|
|
66
|
+
if not data:
|
|
67
|
+
# Empty DataFrame with columns
|
|
68
|
+
df = pd.DataFrame(columns=columns if columns else [])
|
|
69
|
+
elif normalize_nested and data and isinstance(data[0], dict):
|
|
70
|
+
# Use json_normalize for nested data
|
|
71
|
+
df = pd.json_normalize(data)
|
|
72
|
+
else:
|
|
73
|
+
df = pd.DataFrame(data, columns=columns if columns else None)
|
|
74
|
+
|
|
75
|
+
# Parse dates if requested
|
|
76
|
+
if parse_dates and not df.empty:
|
|
77
|
+
df = parse_datetime_columns(df)
|
|
78
|
+
|
|
79
|
+
return df
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def parse_datetime_columns(
|
|
83
|
+
df: "pd.DataFrame", date_columns: Optional[List[str]] = None, infer: bool = True
|
|
84
|
+
) -> "pd.DataFrame":
|
|
85
|
+
"""Parse datetime columns in DataFrame
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
df: Input DataFrame
|
|
89
|
+
date_columns: Specific columns to parse as dates
|
|
90
|
+
infer: Automatically infer date columns
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
DataFrame with parsed datetime columns
|
|
94
|
+
|
|
95
|
+
Example:
|
|
96
|
+
>>> df = parse_datetime_columns(df, date_columns=['created_at', 'updated_at'])
|
|
97
|
+
"""
|
|
98
|
+
require_pandas()
|
|
99
|
+
|
|
100
|
+
if date_columns:
|
|
101
|
+
for col in date_columns:
|
|
102
|
+
if col in df.columns:
|
|
103
|
+
df[col] = pd.to_datetime(df[col], errors="coerce")
|
|
104
|
+
|
|
105
|
+
elif infer:
|
|
106
|
+
# Infer datetime columns
|
|
107
|
+
for col in df.columns:
|
|
108
|
+
if df[col].dtype == "object":
|
|
109
|
+
# Check if column contains date-like strings
|
|
110
|
+
sample = df[col].dropna().head(10)
|
|
111
|
+
if len(sample) > 0:
|
|
112
|
+
try:
|
|
113
|
+
# Try to parse sample
|
|
114
|
+
pd.to_datetime(sample, errors="raise")
|
|
115
|
+
# If successful, parse entire column
|
|
116
|
+
df[col] = pd.to_datetime(df[col], errors="coerce")
|
|
117
|
+
except (ValueError, TypeError):
|
|
118
|
+
# Not a date column
|
|
119
|
+
pass
|
|
120
|
+
|
|
121
|
+
return df
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def stream_to_dataframe(
|
|
125
|
+
stream_iterator,
|
|
126
|
+
chunk_size: int = 10000,
|
|
127
|
+
columns: Optional[List[str]] = None,
|
|
128
|
+
on_chunk: Optional[callable] = None,
|
|
129
|
+
) -> "pd.DataFrame":
|
|
130
|
+
"""Convert streaming query results to DataFrame
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
stream_iterator: Iterator from stream_query
|
|
134
|
+
chunk_size: Process records in chunks
|
|
135
|
+
columns: Column names (will be inferred if not provided)
|
|
136
|
+
on_chunk: Callback for each chunk processed
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
Complete DataFrame from streamed results
|
|
140
|
+
|
|
141
|
+
Example:
|
|
142
|
+
>>> stream = query_client.stream_query(graph_id, "MATCH (n) RETURN n")
|
|
143
|
+
>>> df = stream_to_dataframe(stream, chunk_size=5000)
|
|
144
|
+
"""
|
|
145
|
+
require_pandas()
|
|
146
|
+
|
|
147
|
+
chunks = []
|
|
148
|
+
current_chunk = []
|
|
149
|
+
|
|
150
|
+
for i, record in enumerate(stream_iterator):
|
|
151
|
+
current_chunk.append(record)
|
|
152
|
+
|
|
153
|
+
if len(current_chunk) >= chunk_size:
|
|
154
|
+
# Process chunk
|
|
155
|
+
chunk_df = pd.DataFrame(current_chunk, columns=columns)
|
|
156
|
+
chunks.append(chunk_df)
|
|
157
|
+
|
|
158
|
+
if on_chunk:
|
|
159
|
+
on_chunk(chunk_df, i + 1)
|
|
160
|
+
|
|
161
|
+
current_chunk = []
|
|
162
|
+
|
|
163
|
+
# Process remaining records
|
|
164
|
+
if current_chunk:
|
|
165
|
+
chunk_df = pd.DataFrame(current_chunk, columns=columns)
|
|
166
|
+
chunks.append(chunk_df)
|
|
167
|
+
|
|
168
|
+
if on_chunk:
|
|
169
|
+
on_chunk(chunk_df, len(current_chunk))
|
|
170
|
+
|
|
171
|
+
# Combine all chunks
|
|
172
|
+
if chunks:
|
|
173
|
+
return pd.concat(chunks, ignore_index=True)
|
|
174
|
+
else:
|
|
175
|
+
return pd.DataFrame(columns=columns if columns else [])
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def dataframe_to_cypher_params(
|
|
179
|
+
df: "pd.DataFrame", param_name: str = "data"
|
|
180
|
+
) -> Dict[str, List[Dict[str, Any]]]:
|
|
181
|
+
"""Convert DataFrame to Cypher query parameters
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
df: DataFrame to convert
|
|
185
|
+
param_name: Parameter name for query
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
Dict with parameter suitable for Cypher queries
|
|
189
|
+
|
|
190
|
+
Example:
|
|
191
|
+
>>> df = pd.DataFrame({'name': ['Alice', 'Bob'], 'age': [30, 25]})
|
|
192
|
+
>>> params = dataframe_to_cypher_params(df)
|
|
193
|
+
>>> query = "UNWIND $data AS row CREATE (p:Person {name: row.name, age: row.age})"
|
|
194
|
+
>>> result = query_client.query(graph_id, query, params)
|
|
195
|
+
"""
|
|
196
|
+
require_pandas()
|
|
197
|
+
import numpy as np
|
|
198
|
+
|
|
199
|
+
# Convert DataFrame to list of dicts
|
|
200
|
+
# First convert to dict format
|
|
201
|
+
records = df.to_dict("records")
|
|
202
|
+
|
|
203
|
+
# Then clean up NaN/NA values in each record
|
|
204
|
+
for record in records:
|
|
205
|
+
for key, value in record.items():
|
|
206
|
+
# Check for any form of missing value (NaN, NA, NaT)
|
|
207
|
+
if pd.isna(value):
|
|
208
|
+
record[key] = None
|
|
209
|
+
# Also handle numpy nan explicitly
|
|
210
|
+
elif isinstance(value, float) and np.isnan(value):
|
|
211
|
+
record[key] = None
|
|
212
|
+
|
|
213
|
+
return {param_name: records}
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def export_query_to_csv(
|
|
217
|
+
query_client,
|
|
218
|
+
graph_id: str,
|
|
219
|
+
query: str,
|
|
220
|
+
output_file: str,
|
|
221
|
+
parameters: Optional[Dict[str, Any]] = None,
|
|
222
|
+
chunk_size: int = 5000,
|
|
223
|
+
**csv_kwargs,
|
|
224
|
+
) -> int:
|
|
225
|
+
"""Export query results directly to CSV file
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
query_client: QueryClient instance
|
|
229
|
+
graph_id: Graph ID to query
|
|
230
|
+
query: Cypher query
|
|
231
|
+
output_file: Output CSV file path
|
|
232
|
+
parameters: Query parameters
|
|
233
|
+
chunk_size: Records per chunk for streaming
|
|
234
|
+
**csv_kwargs: Additional arguments for to_csv
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
Number of records exported
|
|
238
|
+
|
|
239
|
+
Example:
|
|
240
|
+
>>> count = export_query_to_csv(
|
|
241
|
+
... query_client, 'graph_id',
|
|
242
|
+
... "MATCH (c:Company) RETURN c.name, c.revenue",
|
|
243
|
+
... "companies.csv"
|
|
244
|
+
... )
|
|
245
|
+
>>> print(f"Exported {count} records")
|
|
246
|
+
"""
|
|
247
|
+
require_pandas()
|
|
248
|
+
|
|
249
|
+
# Stream query results
|
|
250
|
+
stream = query_client.stream_query(graph_id, query, parameters, chunk_size)
|
|
251
|
+
|
|
252
|
+
# Process in chunks for memory efficiency
|
|
253
|
+
total_count = 0
|
|
254
|
+
first_chunk = True
|
|
255
|
+
|
|
256
|
+
chunks = []
|
|
257
|
+
for record in stream:
|
|
258
|
+
chunks.append(record)
|
|
259
|
+
|
|
260
|
+
if len(chunks) >= chunk_size:
|
|
261
|
+
# Convert chunk to DataFrame
|
|
262
|
+
df_chunk = pd.DataFrame(chunks)
|
|
263
|
+
|
|
264
|
+
# Write to CSV
|
|
265
|
+
if first_chunk:
|
|
266
|
+
df_chunk.to_csv(output_file, index=False, **csv_kwargs)
|
|
267
|
+
first_chunk = False
|
|
268
|
+
else:
|
|
269
|
+
df_chunk.to_csv(output_file, mode="a", index=False, header=False, **csv_kwargs)
|
|
270
|
+
|
|
271
|
+
total_count += len(chunks)
|
|
272
|
+
chunks = []
|
|
273
|
+
|
|
274
|
+
# Write remaining records
|
|
275
|
+
if chunks:
|
|
276
|
+
df_chunk = pd.DataFrame(chunks)
|
|
277
|
+
if first_chunk:
|
|
278
|
+
df_chunk.to_csv(output_file, index=False, **csv_kwargs)
|
|
279
|
+
else:
|
|
280
|
+
df_chunk.to_csv(output_file, mode="a", index=False, header=False, **csv_kwargs)
|
|
281
|
+
total_count += len(chunks)
|
|
282
|
+
|
|
283
|
+
logger.info(f"Exported {total_count} records to {output_file}")
|
|
284
|
+
return total_count
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def compare_dataframes(
|
|
288
|
+
df1: "pd.DataFrame",
|
|
289
|
+
df2: "pd.DataFrame",
|
|
290
|
+
key_columns: Optional[List[str]] = None,
|
|
291
|
+
compare_columns: Optional[List[str]] = None,
|
|
292
|
+
) -> "pd.DataFrame":
|
|
293
|
+
"""Compare two DataFrames and return differences
|
|
294
|
+
|
|
295
|
+
Args:
|
|
296
|
+
df1: First DataFrame
|
|
297
|
+
df2: Second DataFrame
|
|
298
|
+
key_columns: Columns to use as keys for comparison
|
|
299
|
+
compare_columns: Specific columns to compare
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
DataFrame with differences
|
|
303
|
+
|
|
304
|
+
Example:
|
|
305
|
+
>>> old_data = query_to_dataframe(old_result)
|
|
306
|
+
>>> new_data = query_to_dataframe(new_result)
|
|
307
|
+
>>> diff = compare_dataframes(old_data, new_data, key_columns=['id'])
|
|
308
|
+
"""
|
|
309
|
+
require_pandas()
|
|
310
|
+
|
|
311
|
+
if key_columns:
|
|
312
|
+
# Merge on key columns
|
|
313
|
+
merged = pd.merge(
|
|
314
|
+
df1, df2, on=key_columns, how="outer", suffixes=("_old", "_new"), indicator=True
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
# Find differences
|
|
318
|
+
if compare_columns:
|
|
319
|
+
for col in compare_columns:
|
|
320
|
+
col_old = f"{col}_old"
|
|
321
|
+
col_new = f"{col}_new"
|
|
322
|
+
if col_old in merged.columns and col_new in merged.columns:
|
|
323
|
+
merged[f"{col}_changed"] = merged[col_old] != merged[col_new]
|
|
324
|
+
|
|
325
|
+
return merged
|
|
326
|
+
else:
|
|
327
|
+
# Compare entire DataFrames
|
|
328
|
+
return pd.concat([df1, df2]).drop_duplicates(keep=False)
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
class DataFrameQueryClient:
|
|
332
|
+
"""Query client with built-in DataFrame support"""
|
|
333
|
+
|
|
334
|
+
def __init__(self, query_client):
|
|
335
|
+
"""Initialize with a QueryClient instance
|
|
336
|
+
|
|
337
|
+
Args:
|
|
338
|
+
query_client: Existing QueryClient instance
|
|
339
|
+
"""
|
|
340
|
+
require_pandas()
|
|
341
|
+
self.query_client = query_client
|
|
342
|
+
|
|
343
|
+
def query_df(
|
|
344
|
+
self,
|
|
345
|
+
graph_id: str,
|
|
346
|
+
query: str,
|
|
347
|
+
parameters: Optional[Dict[str, Any]] = None,
|
|
348
|
+
normalize_nested: bool = True,
|
|
349
|
+
parse_dates: bool = True,
|
|
350
|
+
) -> "pd.DataFrame":
|
|
351
|
+
"""Execute query and return results as DataFrame
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
graph_id: Graph ID to query
|
|
355
|
+
query: Cypher query
|
|
356
|
+
parameters: Query parameters
|
|
357
|
+
normalize_nested: Flatten nested dictionaries
|
|
358
|
+
parse_dates: Parse datetime columns
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
Query results as pandas DataFrame
|
|
362
|
+
|
|
363
|
+
Example:
|
|
364
|
+
>>> df_client = DataFrameQueryClient(query_client)
|
|
365
|
+
>>> df = df_client.query_df('graph_id', "MATCH (c:Company) RETURN c")
|
|
366
|
+
>>> print(df.describe())
|
|
367
|
+
"""
|
|
368
|
+
result = self.query_client.query(graph_id, query, parameters)
|
|
369
|
+
return query_result_to_dataframe(result, normalize_nested, parse_dates)
|
|
370
|
+
|
|
371
|
+
def stream_df(
|
|
372
|
+
self,
|
|
373
|
+
graph_id: str,
|
|
374
|
+
query: str,
|
|
375
|
+
parameters: Optional[Dict[str, Any]] = None,
|
|
376
|
+
chunk_size: int = 10000,
|
|
377
|
+
) -> "pd.DataFrame":
|
|
378
|
+
"""Stream query results and return as DataFrame
|
|
379
|
+
|
|
380
|
+
Args:
|
|
381
|
+
graph_id: Graph ID to query
|
|
382
|
+
query: Cypher query
|
|
383
|
+
parameters: Query parameters
|
|
384
|
+
chunk_size: Records per chunk
|
|
385
|
+
|
|
386
|
+
Returns:
|
|
387
|
+
Complete DataFrame from streamed results
|
|
388
|
+
|
|
389
|
+
Example:
|
|
390
|
+
>>> df = df_client.stream_df(
|
|
391
|
+
... 'graph_id',
|
|
392
|
+
... "MATCH (n) RETURN n",
|
|
393
|
+
... chunk_size=5000
|
|
394
|
+
... )
|
|
395
|
+
"""
|
|
396
|
+
stream = self.query_client.stream_query(graph_id, query, parameters, chunk_size)
|
|
397
|
+
return stream_to_dataframe(stream, chunk_size)
|
|
398
|
+
|
|
399
|
+
def query_batch_df(
|
|
400
|
+
self,
|
|
401
|
+
graph_id: str,
|
|
402
|
+
queries: List[str],
|
|
403
|
+
parameters_list: Optional[List[Dict[str, Any]]] = None,
|
|
404
|
+
) -> List["pd.DataFrame"]:
|
|
405
|
+
"""Execute multiple queries and return as DataFrames
|
|
406
|
+
|
|
407
|
+
Args:
|
|
408
|
+
graph_id: Graph ID to query
|
|
409
|
+
queries: List of Cypher queries
|
|
410
|
+
parameters_list: List of parameter dicts
|
|
411
|
+
|
|
412
|
+
Returns:
|
|
413
|
+
List of DataFrames, one per query
|
|
414
|
+
|
|
415
|
+
Example:
|
|
416
|
+
>>> dfs = df_client.query_batch_df('graph_id', [
|
|
417
|
+
... "MATCH (p:Person) RETURN p",
|
|
418
|
+
... "MATCH (c:Company) RETURN c"
|
|
419
|
+
... ])
|
|
420
|
+
"""
|
|
421
|
+
results = self.query_client.query_batch(graph_id, queries, parameters_list)
|
|
422
|
+
dfs = []
|
|
423
|
+
|
|
424
|
+
for result in results:
|
|
425
|
+
if isinstance(result, dict) and "error" in result:
|
|
426
|
+
# Create error DataFrame
|
|
427
|
+
dfs.append(pd.DataFrame([result]))
|
|
428
|
+
else:
|
|
429
|
+
dfs.append(query_result_to_dataframe(result))
|
|
430
|
+
|
|
431
|
+
return dfs
|
|
432
|
+
|
|
433
|
+
def export_to_csv(
|
|
434
|
+
self,
|
|
435
|
+
graph_id: str,
|
|
436
|
+
query: str,
|
|
437
|
+
output_file: str,
|
|
438
|
+
parameters: Optional[Dict[str, Any]] = None,
|
|
439
|
+
**csv_kwargs,
|
|
440
|
+
) -> int:
|
|
441
|
+
"""Export query results to CSV
|
|
442
|
+
|
|
443
|
+
Args:
|
|
444
|
+
graph_id: Graph ID to query
|
|
445
|
+
query: Cypher query
|
|
446
|
+
output_file: Output CSV file path
|
|
447
|
+
parameters: Query parameters
|
|
448
|
+
**csv_kwargs: Additional arguments for to_csv
|
|
449
|
+
|
|
450
|
+
Returns:
|
|
451
|
+
Number of records exported
|
|
452
|
+
"""
|
|
453
|
+
return export_query_to_csv(
|
|
454
|
+
self.query_client, graph_id, query, output_file, parameters, **csv_kwargs
|
|
455
|
+
)
|
|
@@ -39,6 +39,22 @@ class RoboSystemsExtensions:
|
|
|
39
39
|
"timeout": config.timeout,
|
|
40
40
|
}
|
|
41
41
|
|
|
42
|
+
# Extract token from headers if it was set by auth classes
|
|
43
|
+
# The auth classes should set the token in a standard way
|
|
44
|
+
token = None
|
|
45
|
+
if config.headers:
|
|
46
|
+
# Check for Authorization Bearer token
|
|
47
|
+
auth_header = config.headers.get("Authorization", "")
|
|
48
|
+
if auth_header.startswith("Bearer "):
|
|
49
|
+
token = auth_header[7:]
|
|
50
|
+
# Check for X-API-Key
|
|
51
|
+
elif config.headers.get("X-API-Key"):
|
|
52
|
+
token = config.headers.get("X-API-Key")
|
|
53
|
+
|
|
54
|
+
# Pass token to child clients if available
|
|
55
|
+
if token:
|
|
56
|
+
self.config["token"] = token
|
|
57
|
+
|
|
42
58
|
# Initialize clients
|
|
43
59
|
self.copy = CopyClient(self.config)
|
|
44
60
|
self.query = QueryClient(self.config)
|