sdg-hub 0.7.2__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. sdg_hub/_version.py +2 -2
  2. sdg_hub/core/__init__.py +13 -1
  3. sdg_hub/core/blocks/__init__.py +11 -2
  4. sdg_hub/core/blocks/agent/__init__.py +6 -0
  5. sdg_hub/core/blocks/agent/agent_block.py +397 -0
  6. sdg_hub/core/blocks/base.py +4 -1
  7. sdg_hub/core/blocks/filtering/column_value_filter.py +2 -0
  8. sdg_hub/core/blocks/llm/__init__.py +3 -2
  9. sdg_hub/core/blocks/llm/llm_chat_block.py +2 -0
  10. sdg_hub/core/blocks/llm/{llm_parser_block.py → llm_response_extractor_block.py} +32 -9
  11. sdg_hub/core/blocks/llm/prompt_builder_block.py +2 -0
  12. sdg_hub/core/blocks/llm/text_parser_block.py +2 -0
  13. sdg_hub/core/blocks/transform/duplicate_columns.py +2 -0
  14. sdg_hub/core/blocks/transform/index_based_mapper.py +2 -0
  15. sdg_hub/core/blocks/transform/json_structure_block.py +2 -0
  16. sdg_hub/core/blocks/transform/melt_columns.py +2 -0
  17. sdg_hub/core/blocks/transform/rename_columns.py +12 -0
  18. sdg_hub/core/blocks/transform/text_concat.py +2 -0
  19. sdg_hub/core/blocks/transform/uniform_col_val_setter.py +2 -0
  20. sdg_hub/core/connectors/__init__.py +46 -0
  21. sdg_hub/core/connectors/agent/__init__.py +10 -0
  22. sdg_hub/core/connectors/agent/base.py +233 -0
  23. sdg_hub/core/connectors/agent/langflow.py +151 -0
  24. sdg_hub/core/connectors/base.py +99 -0
  25. sdg_hub/core/connectors/exceptions.py +41 -0
  26. sdg_hub/core/connectors/http/__init__.py +6 -0
  27. sdg_hub/core/connectors/http/client.py +150 -0
  28. sdg_hub/core/connectors/registry.py +112 -0
  29. sdg_hub/core/flow/base.py +7 -31
  30. sdg_hub/core/utils/flow_metrics.py +3 -3
  31. sdg_hub/flows/evaluation/rag/flow.yaml +6 -6
  32. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml +4 -4
  33. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/flow.yaml +3 -3
  34. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml +4 -4
  35. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml +2 -2
  36. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +7 -7
  37. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml +7 -7
  38. sdg_hub/flows/text_analysis/structured_insights/flow.yaml +4 -4
  39. {sdg_hub-0.7.2.dist-info → sdg_hub-0.8.0.dist-info}/METADATA +2 -2
  40. {sdg_hub-0.7.2.dist-info → sdg_hub-0.8.0.dist-info}/RECORD +43 -32
  41. {sdg_hub-0.7.2.dist-info → sdg_hub-0.8.0.dist-info}/WHEEL +1 -1
  42. {sdg_hub-0.7.2.dist-info → sdg_hub-0.8.0.dist-info}/licenses/LICENSE +0 -0
  43. {sdg_hub-0.7.2.dist-info → sdg_hub-0.8.0.dist-info}/top_level.txt +0 -0
@@ -27,6 +27,8 @@ logger = setup_logger(__name__)
27
27
  "Renames columns in a dataset according to a mapping specification",
28
28
  )
29
29
  class RenameColumnsBlock(BaseBlock):
30
+ block_type: str = "transform"
31
+
30
32
  """Block for renaming columns in a dataset.
31
33
 
32
34
  This block renames columns in a dataset according to a mapping specification.
@@ -53,6 +55,16 @@ class RenameColumnsBlock(BaseBlock):
53
55
  )
54
56
  return v
55
57
 
58
+ def model_post_init(self, __context: Any) -> None:
59
+ """Initialize derived attributes after Pydantic validation."""
60
+ super().model_post_init(__context) if hasattr(
61
+ super(), "model_post_init"
62
+ ) else None
63
+
64
+ # Set output_cols to the new column names being created
65
+ if self.output_cols is None:
66
+ self.output_cols = list(self.input_cols.values())
67
+
56
68
  def generate(self, samples: pd.DataFrame, **kwargs: Any) -> pd.DataFrame:
57
69
  """Generate a dataset with renamed columns.
58
70
 
@@ -27,6 +27,8 @@ logger = setup_logger(__name__)
27
27
  "Combines multiple columns into a single column using a specified separator",
28
28
  )
29
29
  class TextConcatBlock(BaseBlock):
30
+ block_type: str = "transform"
31
+
30
32
  """Block for combining multiple columns into a single column.
31
33
 
32
34
  This block concatenates values from multiple columns into a single output column,
@@ -28,6 +28,8 @@ logger = setup_logger(__name__)
28
28
  "Replaces all values in a column with a single summary statistic (e.g., mode, mean, median)",
29
29
  )
30
30
  class UniformColumnValueSetter(BaseBlock):
31
+ block_type: str = "transform"
32
+
31
33
  """Block that replaces all values in a column with a single aggregate value.
32
34
 
33
35
  Supported strategies include: mode, min, max, mean, median.
@@ -0,0 +1,46 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """Connectors subsystem for external service integrations.
3
+
4
+ Example
5
+ -------
6
+ >>> from sdg_hub.core.connectors import (
7
+ ... ConnectorConfig,
8
+ ... ConnectorRegistry,
9
+ ... LangflowConnector,
10
+ ... )
11
+ >>>
12
+ >>> # Using the registry
13
+ >>> connector_class = ConnectorRegistry.get("langflow")
14
+ >>> config = ConnectorConfig(url="http://localhost:7860/api/v1/run/flow")
15
+ >>> connector = connector_class(config=config)
16
+ >>>
17
+ >>> # Direct instantiation
18
+ >>> connector = LangflowConnector(config=config)
19
+ >>> response = connector.send(
20
+ ... messages=[{"role": "user", "content": "Hello!"}],
21
+ ... session_id="session-123",
22
+ ... )
23
+ """
24
+
25
+ # Import agent module to register connectors
26
+ from .agent import BaseAgentConnector, LangflowConnector
27
+ from .base import BaseConnector, ConnectorConfig
28
+ from .exceptions import ConnectorError, ConnectorHTTPError
29
+ from .http import HttpClient
30
+ from .registry import ConnectorRegistry
31
+
32
+ __all__ = [
33
+ # Base classes
34
+ "BaseConnector",
35
+ "ConnectorConfig",
36
+ # Agent connectors
37
+ "BaseAgentConnector",
38
+ "LangflowConnector",
39
+ # Registry
40
+ "ConnectorRegistry",
41
+ # HTTP utilities
42
+ "HttpClient",
43
+ # Exceptions
44
+ "ConnectorError",
45
+ "ConnectorHTTPError",
46
+ ]
@@ -0,0 +1,10 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """Agent connector implementations."""
3
+
4
+ from .base import BaseAgentConnector
5
+ from .langflow import LangflowConnector
6
+
7
+ __all__ = [
8
+ "BaseAgentConnector",
9
+ "LangflowConnector",
10
+ ]
@@ -0,0 +1,233 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """Base class for agent framework connectors."""
3
+
4
+ from abc import abstractmethod
5
+ from typing import Any, Optional
6
+ import asyncio
7
+
8
+ from pydantic import PrivateAttr
9
+
10
+ from ...utils.logger_config import setup_logger
11
+ from ..base import BaseConnector
12
+ from ..exceptions import ConnectorError
13
+ from ..http import HttpClient
14
+
15
+ logger = setup_logger(__name__)
16
+
17
+
18
+ class BaseAgentConnector(BaseConnector):
19
+ """Base class for agent framework connectors.
20
+
21
+ This class provides a common interface for communicating with
22
+ agent frameworks (Langflow, LangGraph, etc.). It uses an async-first
23
+ pattern where the core logic is implemented once in async, and sync
24
+ is derived automatically.
25
+
26
+ Subclasses must implement:
27
+ - build_request: Convert messages to framework-specific format
28
+ - parse_response: Convert framework response to standard format
29
+
30
+ Example
31
+ -------
32
+ >>> class MyAgentConnector(BaseAgentConnector):
33
+ ... def build_request(self, messages, session_id):
34
+ ... return {"input": messages[-1]["content"], "session": session_id}
35
+ ...
36
+ ... def parse_response(self, response):
37
+ ... return {"output": response["result"]}
38
+ ...
39
+ >>> connector = MyAgentConnector(config=ConnectorConfig(url="http://api"))
40
+ >>> response = connector.send([{"role": "user", "content": "Hello"}], "session1")
41
+ """
42
+
43
+ _http_client: Optional[HttpClient] = PrivateAttr(default=None)
44
+
45
+ def _get_http_client(self) -> HttpClient:
46
+ """Get or create the HTTP client."""
47
+ if self._http_client is None:
48
+ self._http_client = HttpClient(
49
+ timeout=self.config.timeout,
50
+ max_retries=self.config.max_retries,
51
+ )
52
+ return self._http_client
53
+
54
+ def _build_headers(self) -> dict[str, str]:
55
+ """Build HTTP headers for requests.
56
+
57
+ Override in subclasses for framework-specific headers.
58
+
59
+ Returns
60
+ -------
61
+ dict[str, str]
62
+ HTTP headers to include in requests.
63
+ """
64
+ headers = {"Content-Type": "application/json"}
65
+ if self.config.api_key:
66
+ headers["Authorization"] = f"Bearer {self.config.api_key}"
67
+ return headers
68
+
69
+ @abstractmethod
70
+ def build_request(
71
+ self,
72
+ messages: list[dict[str, Any]],
73
+ session_id: str,
74
+ ) -> dict[str, Any]:
75
+ """Build framework-specific request payload.
76
+
77
+ Parameters
78
+ ----------
79
+ messages : list[dict]
80
+ List of messages in standard format:
81
+ [{"role": "user", "content": "Hello"}, ...]
82
+ session_id : str
83
+ Session identifier for conversation tracking.
84
+
85
+ Returns
86
+ -------
87
+ dict
88
+ Framework-specific request payload.
89
+ """
90
+ pass
91
+
92
+ @abstractmethod
93
+ def parse_response(
94
+ self, response: dict[str, Any], extract_text: bool = False
95
+ ) -> Any:
96
+ """Parse and validate framework response.
97
+
98
+ Parameters
99
+ ----------
100
+ response : dict
101
+ Raw response from the framework.
102
+ extract_text : bool
103
+ If True, extract just the text content. Default False returns full response.
104
+
105
+ Returns
106
+ -------
107
+ dict or str
108
+ Full response dict, or just the text if extract_text=True.
109
+
110
+ Raises
111
+ ------
112
+ ConnectorError
113
+ If the response is invalid or cannot be parsed.
114
+ """
115
+ pass
116
+
117
+ async def _send_async(
118
+ self,
119
+ messages: list[dict[str, Any]],
120
+ session_id: str,
121
+ ) -> Any:
122
+ """Core async implementation.
123
+
124
+ Parameters
125
+ ----------
126
+ messages : list[dict]
127
+ Messages to send to the agent.
128
+ session_id : str
129
+ Session identifier.
130
+
131
+ Returns
132
+ -------
133
+ Any
134
+ Parsed response from the agent (dict or str if extract_text=True).
135
+ """
136
+ if not self.config.url:
137
+ raise ConnectorError("No URL configured for connector")
138
+
139
+ http_client = self._get_http_client()
140
+ request = self.build_request(messages, session_id)
141
+ headers = self._build_headers()
142
+
143
+ logger.debug(f"Sending request to {self.config.url}")
144
+ raw_response = await http_client.post(
145
+ url=self.config.url,
146
+ payload=request,
147
+ headers=headers,
148
+ )
149
+ logger.debug(f"Received response from {self.config.url}")
150
+
151
+ extract_text = getattr(self.config, "extract_text", False)
152
+ return self.parse_response(raw_response, extract_text=extract_text)
153
+
154
+ def send(
155
+ self,
156
+ messages: list[dict[str, Any]],
157
+ session_id: str,
158
+ async_mode: bool = False,
159
+ ):
160
+ """Send messages to the agent.
161
+
162
+ Parameters
163
+ ----------
164
+ messages : list[dict]
165
+ Messages to send, in format:
166
+ [{"role": "user", "content": "Hello"}, ...]
167
+ session_id : str
168
+ Session identifier for conversation tracking.
169
+ async_mode : bool, optional
170
+ If True, returns a coroutine. If False (default), runs synchronously.
171
+
172
+ Returns
173
+ -------
174
+ dict or Coroutine[dict]
175
+ Response dict, or coroutine if async_mode=True.
176
+ """
177
+ if async_mode:
178
+ return self._send_async(messages, session_id)
179
+
180
+ # Sync mode: run async code in event loop
181
+ try:
182
+ asyncio.get_running_loop()
183
+ # Already in async context - use thread executor
184
+ import concurrent.futures
185
+
186
+ with concurrent.futures.ThreadPoolExecutor() as executor:
187
+ future = executor.submit(
188
+ asyncio.run,
189
+ self._send_async(messages, session_id),
190
+ )
191
+ return future.result()
192
+ except RuntimeError:
193
+ # No event loop - create one
194
+ return asyncio.run(self._send_async(messages, session_id))
195
+
196
+ async def asend(
197
+ self,
198
+ messages: list[dict[str, Any]],
199
+ session_id: str,
200
+ ) -> Any:
201
+ """Async send - convenience wrapper.
202
+
203
+ Parameters
204
+ ----------
205
+ messages : list[dict]
206
+ Messages to send.
207
+ session_id : str
208
+ Session identifier.
209
+
210
+ Returns
211
+ -------
212
+ Any
213
+ Response from the agent (dict or str if extract_text=True).
214
+ """
215
+ return await self._send_async(messages, session_id)
216
+
217
+ def execute(self, request: dict[str, Any]) -> dict[str, Any]:
218
+ """Execute a request (BaseConnector interface).
219
+
220
+ Parameters
221
+ ----------
222
+ request : dict
223
+ Request containing 'messages' and 'session_id' keys.
224
+
225
+ Returns
226
+ -------
227
+ dict
228
+ Response from the agent.
229
+ """
230
+ return self.send(
231
+ messages=request["messages"],
232
+ session_id=request.get("session_id", "default"),
233
+ )
@@ -0,0 +1,151 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """Langflow agent framework connector."""
3
+
4
+ from typing import Any
5
+
6
+ from ...utils.logger_config import setup_logger
7
+ from ..exceptions import ConnectorError
8
+ from ..registry import ConnectorRegistry
9
+ from .base import BaseAgentConnector
10
+
11
+ logger = setup_logger(__name__)
12
+
13
+
14
+ @ConnectorRegistry.register("langflow")
15
+ class LangflowConnector(BaseAgentConnector):
16
+ """Connector for Langflow agent framework.
17
+
18
+ Langflow is a visual framework for building LLM-powered applications.
19
+ This connector handles the specific request/response format used by
20
+ Langflow's API.
21
+
22
+ Langflow expects:
23
+ - Single string input (not message array)
24
+ - Session ID for conversation tracking
25
+ - Returns structured response with outputs
26
+
27
+ Example
28
+ -------
29
+ >>> from sdg_hub.core.connectors import ConnectorConfig, LangflowConnector
30
+ >>>
31
+ >>> config = ConnectorConfig(
32
+ ... url="http://localhost:7860/api/v1/run/my-flow",
33
+ ... api_key="your-api-key",
34
+ ... )
35
+ >>> connector = LangflowConnector(config=config)
36
+ >>> response = connector.send(
37
+ ... messages=[{"role": "user", "content": "Hello!"}],
38
+ ... session_id="session-123",
39
+ ... )
40
+ """
41
+
42
+ def _build_headers(self) -> dict[str, str]:
43
+ """Build headers for Langflow API.
44
+
45
+ Langflow uses x-api-key header for authentication.
46
+
47
+ Returns
48
+ -------
49
+ dict[str, str]
50
+ HTTP headers.
51
+ """
52
+ headers = {"Content-Type": "application/json"}
53
+ if self.config.api_key:
54
+ # Langflow uses x-api-key header
55
+ headers["x-api-key"] = self.config.api_key
56
+ return headers
57
+
58
+ def build_request(
59
+ self,
60
+ messages: list[dict[str, Any]],
61
+ session_id: str,
62
+ ) -> dict[str, Any]:
63
+ """Build Langflow-specific request payload.
64
+
65
+ Langflow expects a single string input, not a message array.
66
+ We extract the last user message content.
67
+
68
+ Parameters
69
+ ----------
70
+ messages : list[dict]
71
+ Messages in standard format.
72
+ session_id : str
73
+ Session identifier.
74
+
75
+ Returns
76
+ -------
77
+ dict
78
+ Langflow API request payload.
79
+ """
80
+ input_value = self._extract_last_user_message(messages)
81
+
82
+ return {
83
+ "output_type": "chat",
84
+ "input_type": "chat",
85
+ "input_value": input_value,
86
+ "session_id": session_id,
87
+ }
88
+
89
+ def parse_response(
90
+ self, response: dict[str, Any], extract_text: bool = False
91
+ ) -> Any:
92
+ """Parse Langflow response.
93
+
94
+ Parameters
95
+ ----------
96
+ response : dict
97
+ Raw response from Langflow API.
98
+ extract_text : bool
99
+ If True, extract just the text content. Default False returns full response.
100
+
101
+ Returns
102
+ -------
103
+ dict or str
104
+ Full response dict, or just the text if extract_text=True.
105
+
106
+ Raises
107
+ ------
108
+ ConnectorError
109
+ If response is not a valid dict or text extraction fails.
110
+ """
111
+ if not isinstance(response, dict):
112
+ raise ConnectorError(
113
+ f"Expected dict response, got {type(response).__name__}"
114
+ )
115
+
116
+ if extract_text:
117
+ try:
118
+ return response["outputs"][0]["outputs"][0]["results"]["message"][
119
+ "text"
120
+ ]
121
+ except (KeyError, IndexError, TypeError) as e:
122
+ raise ConnectorError(f"Failed to extract text from response: {e}")
123
+
124
+ return response
125
+
126
+ def _extract_last_user_message(self, messages: list[dict[str, Any]]) -> str:
127
+ """Extract the last user message content.
128
+
129
+ Parameters
130
+ ----------
131
+ messages : list[dict]
132
+ List of messages.
133
+
134
+ Returns
135
+ -------
136
+ str
137
+ Content of the last user message.
138
+
139
+ Raises
140
+ ------
141
+ ConnectorError
142
+ If no user message is found.
143
+ """
144
+ for msg in reversed(messages):
145
+ if msg.get("role") == "user" and msg.get("content"):
146
+ return msg["content"]
147
+
148
+ raise ConnectorError(
149
+ "No user message found in messages. "
150
+ "Expected at least one message with role='user' and content."
151
+ )
@@ -0,0 +1,99 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """Base connector classes for external service integrations."""
3
+
4
+ from abc import ABC, abstractmethod
5
+ from typing import Any, Optional
6
+
7
+ from pydantic import BaseModel, ConfigDict, Field
8
+
9
+ from ..utils.logger_config import setup_logger
10
+
11
+ logger = setup_logger(__name__)
12
+
13
+
14
+ class ConnectorConfig(BaseModel):
15
+ """Base configuration for all connectors.
16
+
17
+ Attributes
18
+ ----------
19
+ url : str, optional
20
+ The base URL for the external service.
21
+ api_key : str, optional
22
+ API key for authentication.
23
+ timeout : float
24
+ Request timeout in seconds. Default is 120.0.
25
+ max_retries : int
26
+ Maximum number of retry attempts. Default is 3.
27
+ """
28
+
29
+ url: Optional[str] = Field(None, description="Base URL for the service")
30
+ api_key: Optional[str] = Field(None, description="API key for authentication")
31
+ timeout: float = Field(120.0, description="Request timeout in seconds", gt=0)
32
+ max_retries: int = Field(3, description="Maximum retry attempts", ge=0)
33
+ extract_text: bool = Field(
34
+ False,
35
+ description="Extract just the text content from agent response",
36
+ )
37
+
38
+ model_config = ConfigDict(extra="allow")
39
+
40
+
41
+ class BaseConnector(BaseModel, ABC):
42
+ """Abstract base class for all connectors.
43
+
44
+ Connectors handle communication with external services.
45
+
46
+ Attributes
47
+ ----------
48
+ config : ConnectorConfig
49
+ Configuration for the connector.
50
+
51
+ Example
52
+ -------
53
+ >>> class MyConnector(BaseConnector):
54
+ ... def execute(self, request: dict) -> dict:
55
+ ... return {"result": request.get("input")}
56
+ ...
57
+ >>> connector = MyConnector(config=ConnectorConfig(url="http://example.com"))
58
+ >>> result = connector.execute({"input": "test"})
59
+ """
60
+
61
+ config: ConnectorConfig = Field(..., description="Connector configuration")
62
+
63
+ model_config = ConfigDict(arbitrary_types_allowed=True)
64
+
65
+ @abstractmethod
66
+ def execute(self, request: Any) -> Any:
67
+ """Execute a synchronous request.
68
+
69
+ Parameters
70
+ ----------
71
+ request : Any
72
+ The request to execute (format depends on connector type).
73
+
74
+ Returns
75
+ -------
76
+ Any
77
+ The response from the external service.
78
+ """
79
+ pass
80
+
81
+ async def aexecute(self, request: Any) -> Any:
82
+ """Execute an asynchronous request.
83
+
84
+ Default implementation wraps sync execute in a thread.
85
+ Subclasses should override for true async support.
86
+
87
+ Parameters
88
+ ----------
89
+ request : Any
90
+ The request to execute.
91
+
92
+ Returns
93
+ -------
94
+ Any
95
+ The response from the external service.
96
+ """
97
+ import asyncio
98
+
99
+ return await asyncio.to_thread(self.execute, request)
@@ -0,0 +1,41 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """Exception classes for the connectors subsystem."""
3
+
4
+ from typing import Optional
5
+
6
+ from ..utils.error_handling import SDGHubError
7
+
8
+
9
+ class ConnectorError(SDGHubError):
10
+ """Base exception for all connector-related errors.
11
+
12
+ Use this for general connector errors including:
13
+ - Configuration errors
14
+ - Connection failures
15
+ - Timeout errors
16
+ - Response parsing errors
17
+ """
18
+
19
+ pass
20
+
21
+
22
+ class ConnectorHTTPError(ConnectorError):
23
+ """Raised when an HTTP request returns an error status code.
24
+
25
+ Parameters
26
+ ----------
27
+ url : str
28
+ The URL that returned an error.
29
+ status_code : int
30
+ The HTTP status code.
31
+ message : str, optional
32
+ Additional error details (e.g., response body).
33
+ """
34
+
35
+ def __init__(self, url: str, status_code: int, message: Optional[str] = None):
36
+ self.url = url
37
+ self.status_code = status_code
38
+ error_msg = f"HTTP {status_code} error from '{url}'"
39
+ if message:
40
+ error_msg = f"{error_msg}: {message}"
41
+ super().__init__(error_msg)
@@ -0,0 +1,6 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """HTTP utilities for connectors."""
3
+
4
+ from .client import HttpClient
5
+
6
+ __all__ = ["HttpClient"]