fustor-fusion-sdk 0.1.2.post3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ Metadata-Version: 2.4
2
+ Name: fustor-fusion-sdk
3
+ Version: 0.1.2.post3
4
+ Summary: SDK for Fustor Fusion service
5
+ License-Expression: MIT
6
+ Requires-Dist: fustor-common
7
+ Requires-Dist: fustor-registry-client
@@ -0,0 +1,39 @@
1
+ # fustor-fusion-sdk
2
+
3
+ This package provides a Software Development Kit (SDK) for interacting with the Fustor Fusion service. It offers a client and interfaces to facilitate programmatic access and integration with the Fusion service's functionalities, such as data ingestion and processing.
4
+
5
+ ## Features
6
+
7
+ * **Client**: A Python client for making requests to the Fustor Fusion API.
8
+ * **Interfaces**: Defines abstract interfaces for various components of the Fustor Fusion service, allowing for consistent interaction patterns.
9
+
10
+ ## Installation
11
+
12
+ This package is part of the Fustor monorepo and is typically installed in editable mode within the monorepo's development environment using `uv sync`.
13
+
14
+ ## Usage
15
+
16
+ Developers can use this SDK to build custom applications or integrations that need to communicate with the Fustor Fusion service. It simplifies the process of sending data to Fusion and interacting with its processing capabilities.
17
+
18
+ Example (conceptual):
19
+
20
+ ```python
21
+ from fustor_fusion_sdk.client import FusionClient
22
+ from fustor_fusion_sdk.models import IngestDataRequest
23
+
24
+ # Assuming FusionClient is initialized with the Fusion service URL
25
+ client = FusionClient(base_url="http://localhost:8102")
26
+
27
+ # Example: Ingest data
28
+ data_to_ingest = IngestDataRequest(
29
+ session_id="some-session-id",
30
+ events=[{"key": "value", "timestamp": 1678886400}]
31
+ )
32
+ response = client.ingest_data(data_to_ingest)
33
+ print(response)
34
+ ```
35
+
36
+ ## Dependencies
37
+
38
+ * `fustor-common`: Provides foundational elements and shared components.
39
+ * `fustor-registry-client`: (If applicable) Used for interacting with the Fustor Registry service, which might be a dependency for Fusion configuration or session management.
@@ -0,0 +1,22 @@
1
+ [project]
2
+ name = "fustor-fusion-sdk"
3
+ dynamic = ["version"]
4
+ description = "SDK for Fustor Fusion service"
5
+ license = "MIT"
6
+ dependencies = [ "fustor-common", "fustor-registry-client",]
7
+
8
+ [build-system]
9
+ requires = [ "setuptools>=61.0", "setuptools-scm>=8.0"]
10
+ build-backend = "setuptools.build_meta"
11
+
12
+ [tool.setuptools_scm]
13
+ root = "../.."
14
+ version_scheme = "post-release"
15
+ local_scheme = "dirty-tag"
16
+
17
+ ["project.urls"]
18
+ Homepage = "https://github.com/excelwang/fustor/tree/master/packages/fustor_fusion_sdk"
19
+ "Bug Tracker" = "https://github.com/excelwang/fustor/issues"
20
+
21
+ [tool.setuptools.packages.find]
22
+ where = [ "src",]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,101 @@
1
+
2
+ import httpx
3
+ import logging
4
+ from typing import Optional, List, Dict, Any
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ def contains_surrogate_characters(text: str) -> bool:
9
+ """Check if text contains surrogate characters."""
10
+ try:
11
+ text.encode('utf-8')
12
+ return False
13
+ except UnicodeEncodeError:
14
+ return True
15
+
16
+ def sanitize_surrogate_characters(obj: Any) -> Any:
17
+ """
18
+ Recursively sanitize an object by replacing surrogate characters with safe alternatives.
19
+ """
20
+ if isinstance(obj, str):
21
+ if contains_surrogate_characters(obj):
22
+ # Encode with replacement and decode back to handle surrogate characters
23
+ return obj.encode('utf-8', errors='replace').decode('utf-8')
24
+ return obj
25
+ elif isinstance(obj, dict):
26
+ return {key: sanitize_surrogate_characters(value) for key, value in obj.items()}
27
+ elif isinstance(obj, list):
28
+ return [sanitize_surrogate_characters(item) for item in obj]
29
+ elif isinstance(obj, tuple):
30
+ return tuple(sanitize_surrogate_characters(item) for item in obj)
31
+ else:
32
+ return obj
33
+
34
+ class FusionClient:
35
+ def __init__(self, base_url: str, api_key: str):
36
+ self.base_url = base_url
37
+ self.api_key = api_key
38
+ self.client = httpx.AsyncClient(base_url=self.base_url, headers={"X-API-Key": self.api_key})
39
+
40
+ async def create_session(self, task_id: str) -> Optional[str]:
41
+ """
42
+ Creates a new session and returns the session ID.
43
+ """
44
+ try:
45
+ # Sanitize task_id to handle any surrogate characters before JSON serialization
46
+ payload = {"task_id": task_id}
47
+ response = await self.client.post("/ingestor-api/v1/sessions/", json=payload)
48
+ response.raise_for_status()
49
+ return response.json().get("session_id")
50
+ except httpx.HTTPStatusError as e:
51
+ logger.error(f"HTTP error occurred: {e.response.status_code} - {e.response.text}")
52
+ return None
53
+ except Exception as e:
54
+ logger.error(f"An error occurred: {e}")
55
+ return None
56
+
57
+ async def push_events(self, session_id: str, events: List[Dict[str, Any]], source_type: str) -> bool:
58
+ """
59
+ Pushes a batch of events to the Fusion service.
60
+ """
61
+ try:
62
+ # Sanitize events to handle any surrogate characters before JSON serialization
63
+ sanitized_events = [sanitize_surrogate_characters(event) for event in events]
64
+ sanitized_source_type = sanitize_surrogate_characters(source_type)
65
+
66
+ payload = {
67
+ "session_id": session_id,
68
+ "events": sanitized_events,
69
+ "source_type": sanitized_source_type
70
+ }
71
+ response = await self.client.post("/ingestor-api/v1/events/", json=payload)
72
+ response.raise_for_status()
73
+ return True
74
+ except httpx.HTTPStatusError as e:
75
+ logger.error(f"HTTP error occurred during event push: {e.response.status_code} - {e.response.text}")
76
+ return False
77
+ except Exception as e:
78
+ logger.error(f"An error occurred during event push: {e}")
79
+ return False
80
+
81
+ async def send_heartbeat(self, session_id: str) -> bool:
82
+ """
83
+ Sends a heartbeat to the Fusion service to keep the session alive.
84
+ """
85
+ try:
86
+ headers = {"session-id": session_id}
87
+ response = await self.client.post("/ingestor-api/v1/sessions/heartbeat", headers=headers)
88
+ response.raise_for_status()
89
+ return True
90
+ except httpx.HTTPStatusError as e:
91
+ logger.error(f"HTTP error occurred during heartbeat: {e.response.status_code} - {e.response.text}")
92
+ return False
93
+ except Exception as e:
94
+ logger.error(f"An error occurred during heartbeat: {e}")
95
+ return False
96
+
97
+ async def __aenter__(self):
98
+ return self
99
+
100
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
101
+ await self.client.aclose()
@@ -0,0 +1,92 @@
1
+ from typing import Dict, Optional, List, Any, Protocol
2
+ from dataclasses import dataclass
3
+ import asyncio
4
+
5
+ @dataclass
6
+ class SessionInfo:
7
+ session_id: str
8
+ datastore_id: int
9
+ last_activity: float
10
+ created_at: float
11
+ task_id: Optional[str] = None
12
+ allow_concurrent_push: Optional[bool] = None
13
+ session_timeout_seconds: Optional[int] = None
14
+ client_ip: Optional[str] = None
15
+ cleanup_task: Optional[asyncio.Task] = None
16
+
17
+ class ApiKeyCacheInterface(Protocol):
18
+ """
19
+ Interface for managing API key cache.
20
+ """
21
+ def set_cache(self, api_keys_data: List[Dict[str, Any]]):
22
+ ...
23
+
24
+ def get_datastore_id(self, api_key: str) -> Optional[int]:
25
+ ...
26
+
27
+ from fustor_common.models import DatastoreConfig
28
+ from fustor_registry_client.models import ClientDatastoreConfigResponse
29
+
30
+ class DatastoreConfigCacheInterface(Protocol):
31
+ """
32
+ Interface for managing datastore config cache.
33
+ """
34
+ def set_cache(self, datastore_configs_data: List[ClientDatastoreConfigResponse]):
35
+ ...
36
+
37
+ def get_datastore_config(self, datastore_id: int) -> Optional[DatastoreConfig]:
38
+ ...
39
+
40
+ from fustor_registry_client.models import ClientDatastoreConfigResponse
41
+
42
+ class ParserProcessingTaskManagerInterface(Protocol):
43
+ """
44
+ Interface for managing datastore processing tasks.
45
+ """
46
+ async def start_processing_for_datastore(self, datastore_id: int):
47
+ ...
48
+
49
+ async def stop_processing_for_datastore(self, datastore_id: int):
50
+ ...
51
+
52
+ async def sync_tasks(self, latest_datastore_configs: List[ClientDatastoreConfigResponse]):
53
+ ...
54
+
55
+ async def shutdown(self):
56
+ ...
57
+
58
+ class SessionManagerInterface(Protocol):
59
+ """
60
+ Interface for managing user sessions.
61
+ """
62
+ async def create_session_entry(self, datastore_id: int, session_id: str,
63
+ task_id: Optional[str] = None,
64
+ client_ip: Optional[str] = None,
65
+ allow_concurrent_push: Optional[bool] = None,
66
+ session_timeout_seconds: Optional[int] = None) -> SessionInfo:
67
+ ...
68
+
69
+ async def keep_session_alive(self, datastore_id: int, session_id: str,
70
+ client_ip: Optional[str] = None) -> Optional[SessionInfo]:
71
+ ...
72
+
73
+ async def get_session_info(self, datastore_id: int, session_id: str) -> Optional[SessionInfo]:
74
+ ...
75
+
76
+ async def get_datastore_sessions(self, datastore_id: int) -> Dict[str, SessionInfo]:
77
+ ...
78
+
79
+ async def remove_session(self, datastore_id: int, session_id: str) -> bool:
80
+ ...
81
+
82
+ async def cleanup_expired_sessions(self):
83
+ ...
84
+
85
+ async def terminate_session(self, datastore_id: int, session_id: str) -> bool:
86
+ ...
87
+
88
+ async def start_periodic_cleanup(self, interval_seconds: int = 60):
89
+ ...
90
+
91
+ async def stop_periodic_cleanup(self):
92
+ ...
@@ -0,0 +1,7 @@
1
+ Metadata-Version: 2.4
2
+ Name: fustor-fusion-sdk
3
+ Version: 0.1.2.post3
4
+ Summary: SDK for Fustor Fusion service
5
+ License-Expression: MIT
6
+ Requires-Dist: fustor-common
7
+ Requires-Dist: fustor-registry-client
@@ -0,0 +1,11 @@
1
+ README.md
2
+ pyproject.toml
3
+ src/fustor_fusion_sdk/__init__.py
4
+ src/fustor_fusion_sdk/client.py
5
+ src/fustor_fusion_sdk/interfaces.py
6
+ src/fustor_fusion_sdk.egg-info/PKG-INFO
7
+ src/fustor_fusion_sdk.egg-info/SOURCES.txt
8
+ src/fustor_fusion_sdk.egg-info/dependency_links.txt
9
+ src/fustor_fusion_sdk.egg-info/requires.txt
10
+ src/fustor_fusion_sdk.egg-info/top_level.txt
11
+ tests/test_sdk_surrogate_handling.py
@@ -0,0 +1,2 @@
1
+ fustor-common
2
+ fustor-registry-client
@@ -0,0 +1,147 @@
1
+ """
2
+ Test case to verify that the fustor_fusion_sdk correctly handles surrogate characters
3
+ """
4
+ import pytest
5
+ import asyncio
6
+ from unittest.mock import AsyncMock, patch
7
+ from fustor_fusion_sdk.client import FusionClient, sanitize_surrogate_characters
8
+
9
+
10
+ def test_sanitize_surrogate_characters_string():
11
+ """Test that the sanitize_surrogate_characters function handles strings with surrogate characters."""
12
+ # Test string with surrogate character
13
+ test_str = "valid_text_\udca3_invalid_surrogate"
14
+ result = sanitize_surrogate_characters(test_str)
15
+
16
+ # The result should not contain the invalid surrogate character
17
+ # Instead it should contain a replacement character (usually )
18
+ assert "" in result # Replacement character
19
+ assert "_invalid_surrogate" in result # Valid part remains
20
+ assert "\udca3" not in result # Original surrogate char is gone
21
+
22
+
23
+ def test_sanitize_surrogate_characters_dict():
24
+ """Test that the sanitize_surrogate_characters function handles dictionaries with surrogate characters."""
25
+ test_dict = {
26
+ "valid_key": "valid_value",
27
+ "key_with_surrogate": "path_with_\udca3_surrogate",
28
+ "another_valid": "value_without_surrogates"
29
+ }
30
+
31
+ result = sanitize_surrogate_characters(test_dict)
32
+
33
+ # Valid entries should remain unchanged
34
+ assert result["valid_key"] == "valid_value"
35
+ assert result["another_valid"] == "value_without_surrogates"
36
+
37
+ # The entry with surrogate should be cleaned
38
+ assert "" in result["key_with_surrogate"]
39
+ assert "_surrogate" in result["key_with_surrogate"]
40
+ assert "\udca3" not in result["key_with_surrogate"]
41
+
42
+
43
+ def test_sanitize_surrogate_characters_nested():
44
+ """Test that the sanitize_surrogate_characters function handles nested data structures."""
45
+ test_data = {
46
+ "level1": {
47
+ "level2": [
48
+ "normal_string",
49
+ "string_with_\udca3_surrogate",
50
+ {"nested_key": "nested_\udcb3_value"}
51
+ ]
52
+ }
53
+ }
54
+
55
+ result = sanitize_surrogate_characters(test_data)
56
+
57
+ # Check deeply nested values are cleaned
58
+ assert "" in result["level1"]["level2"][1] # First surrogate string
59
+ assert "" in result["level1"]["level2"][2]["nested_key"] # Nested surrogate
60
+ assert "\udca3" not in result["level1"]["level2"][1]
61
+ assert "\udcb3" not in result["level1"]["level2"][2]["nested_key"]
62
+
63
+ # Valid strings remain unchanged
64
+ assert result["level1"]["level2"][0] == "normal_string"
65
+
66
+
67
+ def test_sanitize_surrogate_characters_no_surrogates():
68
+ """Test that the sanitize_surrogate_characters function doesn't modify strings without surrogates."""
69
+ test_str = "normal_string_without_surrogates"
70
+ result = sanitize_surrogate_characters(test_str)
71
+ assert result == test_str # Should remain unchanged
72
+
73
+
74
+ def test_sanitize_surrogate_characters_other_types():
75
+ """Test that the sanitize_surrogate_characters function handles non-string types correctly."""
76
+ test_data = {
77
+ "string_val": "with_\udca3_surrogate",
78
+ "int_val": 42,
79
+ "bool_val": True,
80
+ "none_val": None,
81
+ "list_val": ["item1", "item2_\udca4_with_surrogate"]
82
+ }
83
+
84
+ result = sanitize_surrogate_characters(test_data)
85
+
86
+ # Non-string types should remain unchanged
87
+ assert result["int_val"] == 42
88
+ assert result["bool_val"] is True
89
+ assert result["none_val"] is None
90
+
91
+ # Strings should be cleaned
92
+ assert "" in result["string_val"]
93
+ assert "" in result["list_val"][1]
94
+
95
+
96
+ @pytest.mark.asyncio
97
+ async def test_push_events_with_surrogate_characters():
98
+ """Test that push_events properly sanitizes events containing surrogate characters."""
99
+ client = FusionClient("http://test.com", "fake-api-key")
100
+
101
+ # Mock the HTTP client to avoid actual network calls
102
+ mock_response = AsyncMock()
103
+ # In httpx, raise_for_status is a sync method, so we mock it differently
104
+ mock_response.raise_for_status = lambda: None # This is a sync method that raises on failure
105
+ mock_response.json.return_value = {"session_id": "test-session"}
106
+
107
+ with patch.object(client.client, 'post', return_value=mock_response) as mock_post:
108
+ # Create test events with surrogate characters
109
+ test_events = [
110
+ {
111
+ "event_type": "update",
112
+ "event_schema": "test_schema",
113
+ "table": "test_table",
114
+ "rows": [{"path": "/valid/path/with_\udca3_surrogate/file.txt", "size": 100}]
115
+ }
116
+ ]
117
+
118
+ # This should not raise an exception due to surrogate characters
119
+ result = await client.push_events("test_session", test_events, "test_source")
120
+
121
+ # Verify that the call was made
122
+ assert result is True
123
+ mock_post.assert_called_once()
124
+
125
+ # Get the arguments passed to the post call
126
+ call_args = mock_post.call_args
127
+ payload = call_args[1]['json'] # Get the JSON payload
128
+
129
+ # The surrogate character should be removed/replaced in the payload
130
+ event_path = payload['events'][0]['rows'][0]['path']
131
+ assert "" in event_path # Replacement character should be present
132
+ assert "\udca3" not in event_path # Original surrogate should be removed
133
+
134
+
135
+
136
+ def test_contains_surrogate_characters_detection():
137
+ """Test that contains_surrogate_characters correctly detects surrogate characters."""
138
+ from fustor_fusion_sdk.client import contains_surrogate_characters
139
+
140
+ # String with surrogate character should return True
141
+ assert contains_surrogate_characters("test_\udca3_string") is True
142
+
143
+ # String without surrogate characters should return False
144
+ assert contains_surrogate_characters("test_string_without_surrogates") is False
145
+
146
+ # Empty string should return False
147
+ assert contains_surrogate_characters("") is False