hb-eval-sdk 2.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Abuelgasim Mohamed Ibrahim Adam
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,123 @@
1
+ Metadata-Version: 2.4
2
+ Name: hb-eval-sdk
3
+ Version: 2.0.0
4
+ Summary: HB-Eval SDK for reliable agent evaluation, semantic memory, and LangChain/LangGraph integration
5
+ Author-email: Abuelgasim Mohamed Ibrahim Adam <abuelgasim.hbeval@outlook.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/hb-evalSystem/HB-System
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
13
+ Requires-Python: >=3.8
14
+ Description-Content-Type: text/markdown
15
+ License-File: LICENSE
16
+ Requires-Dist: requests>=2.28.0
17
+ Requires-Dist: cryptography>=41.0.0
18
+ Requires-Dist: pydantic>=2.0.0
19
+ Requires-Dist: langchain-core>=0.1.0
20
+ Requires-Dist: langgraph>=0.0.10
21
+ Dynamic: license-file
22
+
23
+ # HB-Eval SDK
24
+
25
+ Secure Python SDK for **HB-Eval Reliability OS** — the first cognitive runtime for evaluating and ensuring AI agent reliability.
26
+
27
+ ## Features
28
+
29
+ - AES-256-GCM encrypted payloads for all API communication
30
+ - HMAC-SHA256 request signing with replay-attack protection
31
+ - Safe Halt protocol — automatic failure handling on connection loss
32
+ - Semantic memory retrieval for agent context
33
+ - Native LangChain / LangGraph callback integration
34
+
35
+ ## Installation
36
+
37
+ ```bash
38
+ pip install hb-eval-sdk
39
+ ```
40
+
41
+ ## Quick Start
42
+
43
+ ```python
44
+ from hb_eval_sdk import HBEvalClient
45
+
46
+ client = HBEvalClient(
47
+ api_key="your_api_key",
48
+ aes_key="your_base64_encoded_32byte_aes_key",
49
+ )
50
+
51
+ result = client.evaluate({
52
+ "trajectory": [{"step": 1, "action": "query_memory", "input": "..."}],
53
+ "sub_tasks": 1,
54
+ "constraint_violations": 0,
55
+ "recovery_attempts": 1,
56
+ "context": "my agent task",
57
+ "agent_id": "my-agent",
58
+ })
59
+
60
+ print(result.verdict) # Verdict.SAFE or Verdict.UNSAFE
61
+ print(result.metrics.pei) # float
62
+ print(result.metrics.irs) # float
63
+ ```
64
+
65
+ ## Memory Retrieval
66
+
67
+ ```python
68
+ matches = client.retrieve_memory(context="my task context")
69
+ for match in matches:
70
+ print(match.trajectory_summary, match.similarity)
71
+ ```
72
+
73
+ ## LangChain Integration
74
+
75
+ ```python
76
+ from hb_eval_sdk import HBEvalCallback
77
+
78
+ callback = HBEvalCallback(
79
+ api_key="your_api_key",
80
+ aes_key="your_base64_encoded_32byte_aes_key",
81
+ )
82
+
83
+ # Pass the callback to your LangChain agent
84
+ agent.invoke({"input": "..."}, config={"callbacks": [callback]})
85
+ ```
86
+
87
+ ## Exception Handling
88
+
89
+ ```python
90
+ from hb_eval_sdk import (
91
+ HBEvalError,
92
+ AuthenticationError,
93
+ EncryptionError,
94
+ SafeHaltError,
95
+ ConnectionError,
96
+ )
97
+
98
+ try:
99
+ result = client.evaluate({...})
100
+ except SafeHaltError as e:
101
+ print(f"Safe halt triggered: {e.reason}")
102
+ except AuthenticationError as e:
103
+ print(f"Auth failed: {e}")
104
+ except ConnectionError as e:
105
+ print(f"Connection error: {e}")
106
+ ```
107
+
108
+ ## Requirements
109
+
110
+ - Python >= 3.8
111
+ - requests >= 2.28.0
112
+ - cryptography >= 41.0.0
113
+ - pydantic >= 2.0.0
114
+ - langchain-core >= 0.1.0
115
+ - langgraph >= 0.0.10
116
+
117
+ ## Documentation
118
+
119
+ Full documentation: https://hb-eval.readthedocs.io
120
+
121
+ ## License
122
+
123
+ MIT License — Copyright (c) 2026 Abuelgasim Mohamed Ibrahim Adam
@@ -0,0 +1,101 @@
1
+ # HB-Eval SDK
2
+
3
+ Secure Python SDK for **HB-Eval Reliability OS** — the first cognitive runtime for evaluating and ensuring AI agent reliability.
4
+
5
+ ## Features
6
+
7
+ - AES-256-GCM encrypted payloads for all API communication
8
+ - HMAC-SHA256 request signing with replay-attack protection
9
+ - Safe Halt protocol — automatic failure handling on connection loss
10
+ - Semantic memory retrieval for agent context
11
+ - Native LangChain / LangGraph callback integration
12
+
13
+ ## Installation
14
+
15
+ ```bash
16
+ pip install hb-eval-sdk
17
+ ```
18
+
19
+ ## Quick Start
20
+
21
+ ```python
22
+ from hb_eval_sdk import HBEvalClient
23
+
24
+ client = HBEvalClient(
25
+ api_key="your_api_key",
26
+ aes_key="your_base64_encoded_32byte_aes_key",
27
+ )
28
+
29
+ result = client.evaluate({
30
+ "trajectory": [{"step": 1, "action": "query_memory", "input": "..."}],
31
+ "sub_tasks": 1,
32
+ "constraint_violations": 0,
33
+ "recovery_attempts": 1,
34
+ "context": "my agent task",
35
+ "agent_id": "my-agent",
36
+ })
37
+
38
+ print(result.verdict) # Verdict.SAFE or Verdict.UNSAFE
39
+ print(result.metrics.pei) # float
40
+ print(result.metrics.irs) # float
41
+ ```
42
+
43
+ ## Memory Retrieval
44
+
45
+ ```python
46
+ matches = client.retrieve_memory(context="my task context")
47
+ for match in matches:
48
+ print(match.trajectory_summary, match.similarity)
49
+ ```
50
+
51
+ ## LangChain Integration
52
+
53
+ ```python
54
+ from hb_eval_sdk import HBEvalCallback
55
+
56
+ callback = HBEvalCallback(
57
+ api_key="your_api_key",
58
+ aes_key="your_base64_encoded_32byte_aes_key",
59
+ )
60
+
61
+ # Pass the callback to your LangChain agent
62
+ agent.invoke({"input": "..."}, config={"callbacks": [callback]})
63
+ ```
64
+
65
+ ## Exception Handling
66
+
67
+ ```python
68
+ from hb_eval_sdk import (
69
+ HBEvalError,
70
+ AuthenticationError,
71
+ EncryptionError,
72
+ SafeHaltError,
73
+ ConnectionError,
74
+ )
75
+
76
+ try:
77
+ result = client.evaluate({...})
78
+ except SafeHaltError as e:
79
+ print(f"Safe halt triggered: {e.reason}")
80
+ except AuthenticationError as e:
81
+ print(f"Auth failed: {e}")
82
+ except ConnectionError as e:
83
+ print(f"Connection error: {e}")
84
+ ```
85
+
86
+ ## Requirements
87
+
88
+ - Python >= 3.8
89
+ - requests >= 2.28.0
90
+ - cryptography >= 41.0.0
91
+ - pydantic >= 2.0.0
92
+ - langchain-core >= 0.1.0
93
+ - langgraph >= 0.0.10
94
+
95
+ ## Documentation
96
+
97
+ Full documentation: https://hb-eval.readthedocs.io
98
+
99
+ ## License
100
+
101
+ MIT License — Copyright (c) 2026 Abuelgasim Mohamed Ibrahim Adam
@@ -0,0 +1,24 @@
1
+ from .client import HBEvalClient
2
+ from .langchain_integration import HBEvalCallback
3
+ from .models import EvaluationResult, MemoryMatch
4
+ from .exceptions import (
5
+ HBEvalError,
6
+ ConnectionError,
7
+ AuthenticationError,
8
+ EncryptionError,
9
+ SafeHaltError,
10
+ )
11
+
12
+ __version__ = "2.0.0"
13
+
14
+ __all__ = [
15
+ "HBEvalClient",
16
+ "HBEvalCallback",
17
+ "EvaluationResult",
18
+ "MemoryMatch",
19
+ "HBEvalError",
20
+ "ConnectionError",
21
+ "AuthenticationError",
22
+ "EncryptionError",
23
+ "SafeHaltError",
24
+ ]
@@ -0,0 +1,112 @@
1
+ import json
2
+ import time
3
+ import hashlib
4
+ import base64
5
+ import requests
6
+ from cryptography.hazmat.primitives.ciphers.aead import AESGCM
7
+ from .exceptions import ConnectionError, AuthenticationError, EncryptionError, SafeHaltError
8
+ from .models import EvaluationResult, MemoryMatch, Metrics, Verdict
9
+ from .utils import generate_nonce, generate_timestamp, compute_hmac
10
+ from . import config
11
+
12
+
13
+ class HBEvalClient:
14
+ def __init__(
15
+ self,
16
+ api_key: str,
17
+ aes_key: str,
18
+ gateway_url: str = None,
19
+ timeout: int = None,
20
+ max_retries: int = None,
21
+ ):
22
+ self.api_key = api_key
23
+ self.timeout = timeout or config.DEFAULT_TIMEOUT
24
+ self.max_retries = max_retries or config.DEFAULT_MAX_RETRIES
25
+ self.gateway_url = (gateway_url or config.DEFAULT_GATEWAY_URL).rstrip("/")
26
+ try:
27
+ self.aes_key = base64.b64decode(aes_key)
28
+ if len(self.aes_key) != 32:
29
+ raise ValueError("AES key must be 32 bytes after base64 decoding.")
30
+ except Exception as e:
31
+ raise EncryptionError(f"Invalid AES key: {e}")
32
+ self.hmac_secret = hashlib.sha256(self.api_key.encode()).digest()
33
+
34
+ def _encrypt(self, payload: dict):
35
+ nonce_hex = generate_nonce(12)
36
+ nonce_bytes = nonce_hex.encode("utf-8")
37
+ aesgcm = AESGCM(self.aes_key)
38
+ plaintext = json.dumps(payload).encode("utf-8")
39
+ ciphertext = aesgcm.encrypt(nonce_bytes, plaintext, None)
40
+ return nonce_hex, ciphertext.hex()
41
+
42
+ def _build_headers(self, nonce: str, timestamp: str) -> dict:
43
+ signature = compute_hmac(self.hmac_secret, f"{timestamp}.{nonce}")
44
+ return {
45
+ "Authorization": f"Bearer {self.api_key}",
46
+ "X-HBEval-Nonce": nonce,
47
+ "X-HBEval-Timestamp": timestamp,
48
+ "X-HBEval-Signature": signature,
49
+ "Content-Type": "application/json",
50
+ }
51
+
52
+ def evaluate(self, payload: dict, max_retries: int = None) -> EvaluationResult:
53
+ retries = max_retries if max_retries is not None else self.max_retries
54
+ for attempt in range(retries + 1):
55
+ try:
56
+ nonce, ciphertext = self._encrypt(payload)
57
+ timestamp = generate_timestamp()
58
+ headers = self._build_headers(nonce, timestamp)
59
+ response = requests.post(
60
+ f"{self.gateway_url}/evaluate",
61
+ json={"ciphertext": ciphertext},
62
+ headers=headers,
63
+ timeout=self.timeout,
64
+ )
65
+ if response.status_code == 401:
66
+ raise AuthenticationError("Invalid API key or signature.")
67
+ if response.status_code == 403:
68
+ raise AuthenticationError("Replay attack detected or request expired.")
69
+ if response.status_code != 200:
70
+ raise ConnectionError(
71
+ f"Gateway error {response.status_code}: {response.text}"
72
+ )
73
+ data = response.json()
74
+ metrics_data = data.get("metrics", {"pei": 0.0, "irs": 0.0})
75
+ return EvaluationResult(
76
+ verdict=Verdict(data.get("verdict", "UNSAFE")),
77
+ metrics=Metrics(**metrics_data),
78
+ safe_halt=data.get("safe_halt", False),
79
+ reason=data.get("reason", None),
80
+ )
81
+ except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
82
+ if attempt < retries:
83
+ time.sleep(1 * (attempt + 1))
84
+ continue
85
+ raise SafeHaltError(
86
+ f"Connection lost after {retries + 1} attempt(s): {e}"
87
+ )
88
+ except (AuthenticationError, EncryptionError):
89
+ raise
90
+ except Exception as e:
91
+ raise ConnectionError(f"Unexpected error during evaluation: {e}")
92
+
93
+ def retrieve_memory(self, context: str, project_id: str = None) -> list:
94
+ if project_id is None:
95
+ project_id = config.DEFAULT_PROJECT_ID
96
+ nonce, ciphertext = self._encrypt(
97
+ {"context": context, "project_id": project_id}
98
+ )
99
+ timestamp = generate_timestamp()
100
+ headers = self._build_headers(nonce, timestamp)
101
+ response = requests.post(
102
+ f"{self.gateway_url}/api/v1/memory/retrieve",
103
+ json={"ciphertext": ciphertext},
104
+ headers=headers,
105
+ timeout=self.timeout,
106
+ )
107
+ if response.status_code != 200:
108
+ raise ConnectionError(
109
+ f"Memory retrieval failed with status {response.status_code}: {response.text}"
110
+ )
111
+ data = response.json()
112
+ return [MemoryMatch(**m) for m in data.get("matches", [])]
@@ -0,0 +1,4 @@
1
+ DEFAULT_GATEWAY_URL = "https://hbeval-reliability-os-production.up.railway.app"
2
+ DEFAULT_TIMEOUT = 10
3
+ DEFAULT_MAX_RETRIES = 2
4
+ DEFAULT_PROJECT_ID = "00000000-0000-0000-0000-000000000001"
@@ -0,0 +1,26 @@
1
+ class HBEvalError(Exception):
2
+ """Base exception for HB-Eval SDK."""
3
+ pass
4
+
5
+
6
+ class ConnectionError(HBEvalError):
7
+ """Raised when connection to Gateway fails."""
8
+ pass
9
+
10
+
11
+ class AuthenticationError(HBEvalError):
12
+ """Raised when API key or signature is invalid."""
13
+ pass
14
+
15
+
16
+ class EncryptionError(HBEvalError):
17
+ """Raised when encryption/decryption fails."""
18
+ pass
19
+
20
+
21
+ class SafeHaltError(HBEvalError):
22
+ """Raised when Safe Halt protocol is activated."""
23
+
24
+ def __init__(self, message: str, reason: str = "CONNECTION_LOST_MANDATORY_HALT"):
25
+ super().__init__(message)
26
+ self.reason = reason
@@ -0,0 +1,76 @@
1
+ import time
2
+ from typing import Dict, Any, List, Optional
3
+
4
+ from langchain_core.callbacks.base import BaseCallbackHandler
5
+
6
+ from .client import HBEvalClient
7
+
8
+
9
+ class HBEvalCallback(BaseCallbackHandler):
10
+ """LangChain callback handler that evaluates agent trajectories via HB-Eval."""
11
+
12
+ def __init__(
13
+ self,
14
+ api_key: str,
15
+ aes_key: str,
16
+ gateway_url: Optional[str] = None,
17
+ ):
18
+ super().__init__()
19
+ self.client = HBEvalClient(api_key, aes_key, gateway_url)
20
+ self.trajectory: List[Dict[str, Any]] = []
21
+ self.start_time: float = 0.0
22
+
23
+ def on_chain_start(
24
+ self,
25
+ serialized: Dict[str, Any],
26
+ inputs: Dict[str, Any],
27
+ **kwargs: Any,
28
+ ) -> None:
29
+ self.trajectory = []
30
+ self.start_time = time.time()
31
+ self.trajectory.append(
32
+ {
33
+ "step": 1,
34
+ "action": "chain_start",
35
+ "input": str(inputs)[:500],
36
+ }
37
+ )
38
+
39
+ def on_tool_start(
40
+ self,
41
+ serialized: Dict[str, Any],
42
+ input_str: str,
43
+ **kwargs: Any,
44
+ ) -> None:
45
+ self.trajectory.append(
46
+ {
47
+ "step": len(self.trajectory) + 1,
48
+ "action": "tool_call",
49
+ "tool": serialized.get("name", "unknown"),
50
+ "input": input_str[:500],
51
+ }
52
+ )
53
+
54
+ def on_tool_end(self, output: str, **kwargs: Any) -> None:
55
+ if self.trajectory and self.trajectory[-1].get("action") == "tool_call":
56
+ self.trajectory[-1]["output"] = output[:500]
57
+ self.trajectory[-1]["status"] = "success"
58
+
59
+ def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
60
+ self.trajectory.append(
61
+ {
62
+ "step": len(self.trajectory) + 1,
63
+ "action": "chain_end",
64
+ "output": str(outputs)[:500],
65
+ }
66
+ )
67
+ self.client.evaluate(
68
+ {
69
+ "trajectory": self.trajectory,
70
+ "sub_tasks": len(self.trajectory),
71
+ "constraint_violations": 0,
72
+ "recovery_attempts": 1,
73
+ "context": "LangChain agent execution",
74
+ "agent_id": "langchain-agent",
75
+ }
76
+ )
@@ -0,0 +1,28 @@
1
+ from pydantic import BaseModel
2
+ from typing import Optional
3
+ from enum import Enum
4
+
5
+
6
+ class Verdict(str, Enum):
7
+ SAFE = "SAFE"
8
+ UNSAFE = "UNSAFE"
9
+
10
+
11
+ class Metrics(BaseModel):
12
+ pei: float
13
+ irs: float
14
+
15
+
16
+ class EvaluationResult(BaseModel):
17
+ verdict: Verdict
18
+ metrics: Metrics
19
+ safe_halt: Optional[bool] = False
20
+ reason: Optional[str] = None
21
+
22
+
23
+ class MemoryMatch(BaseModel):
24
+ id: str
25
+ trajectory_summary: str
26
+ pei_score: float
27
+ irs_score: float
28
+ similarity: float
@@ -0,0 +1,19 @@
1
+ import secrets
2
+ import time
3
+ import hmac
4
+ import hashlib
5
+
6
+
7
+ def generate_nonce(length: int = 12) -> str:
8
+ """Generate cryptographically secure nonce as hex string."""
9
+ return secrets.token_hex(length)
10
+
11
+
12
+ def generate_timestamp() -> str:
13
+ """Return current UTC timestamp as string."""
14
+ return str(int(time.time()))
15
+
16
+
17
+ def compute_hmac(secret_key: bytes, message: str) -> str:
18
+ """Compute HMAC-SHA256 signature."""
19
+ return hmac.new(secret_key, message.encode(), hashlib.sha256).hexdigest()
@@ -0,0 +1,123 @@
1
+ Metadata-Version: 2.4
2
+ Name: hb-eval-sdk
3
+ Version: 2.0.0
4
+ Summary: HB-Eval SDK for reliable agent evaluation, semantic memory, and LangChain/LangGraph integration
5
+ Author-email: Abuelgasim Mohamed Ibrahim Adam <abuelgasim.hbeval@outlook.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/hb-evalSystem/HB-System
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
13
+ Requires-Python: >=3.8
14
+ Description-Content-Type: text/markdown
15
+ License-File: LICENSE
16
+ Requires-Dist: requests>=2.28.0
17
+ Requires-Dist: cryptography>=41.0.0
18
+ Requires-Dist: pydantic>=2.0.0
19
+ Requires-Dist: langchain-core>=0.1.0
20
+ Requires-Dist: langgraph>=0.0.10
21
+ Dynamic: license-file
22
+
23
+ # HB-Eval SDK
24
+
25
+ Secure Python SDK for **HB-Eval Reliability OS** — the first cognitive runtime for evaluating and ensuring AI agent reliability.
26
+
27
+ ## Features
28
+
29
+ - AES-256-GCM encrypted payloads for all API communication
30
+ - HMAC-SHA256 request signing with replay-attack protection
31
+ - Safe Halt protocol — automatic failure handling on connection loss
32
+ - Semantic memory retrieval for agent context
33
+ - Native LangChain / LangGraph callback integration
34
+
35
+ ## Installation
36
+
37
+ ```bash
38
+ pip install hb-eval-sdk
39
+ ```
40
+
41
+ ## Quick Start
42
+
43
+ ```python
44
+ from hb_eval_sdk import HBEvalClient
45
+
46
+ client = HBEvalClient(
47
+ api_key="your_api_key",
48
+ aes_key="your_base64_encoded_32byte_aes_key",
49
+ )
50
+
51
+ result = client.evaluate({
52
+ "trajectory": [{"step": 1, "action": "query_memory", "input": "..."}],
53
+ "sub_tasks": 1,
54
+ "constraint_violations": 0,
55
+ "recovery_attempts": 1,
56
+ "context": "my agent task",
57
+ "agent_id": "my-agent",
58
+ })
59
+
60
+ print(result.verdict) # Verdict.SAFE or Verdict.UNSAFE
61
+ print(result.metrics.pei) # float
62
+ print(result.metrics.irs) # float
63
+ ```
64
+
65
+ ## Memory Retrieval
66
+
67
+ ```python
68
+ matches = client.retrieve_memory(context="my task context")
69
+ for match in matches:
70
+ print(match.trajectory_summary, match.similarity)
71
+ ```
72
+
73
+ ## LangChain Integration
74
+
75
+ ```python
76
+ from hb_eval_sdk import HBEvalCallback
77
+
78
+ callback = HBEvalCallback(
79
+ api_key="your_api_key",
80
+ aes_key="your_base64_encoded_32byte_aes_key",
81
+ )
82
+
83
+ # Pass the callback to your LangChain agent
84
+ agent.invoke({"input": "..."}, config={"callbacks": [callback]})
85
+ ```
86
+
87
+ ## Exception Handling
88
+
89
+ ```python
90
+ from hb_eval_sdk import (
91
+ HBEvalError,
92
+ AuthenticationError,
93
+ EncryptionError,
94
+ SafeHaltError,
95
+ ConnectionError,
96
+ )
97
+
98
+ try:
99
+ result = client.evaluate({...})
100
+ except SafeHaltError as e:
101
+ print(f"Safe halt triggered: {e.reason}")
102
+ except AuthenticationError as e:
103
+ print(f"Auth failed: {e}")
104
+ except ConnectionError as e:
105
+ print(f"Connection error: {e}")
106
+ ```
107
+
108
+ ## Requirements
109
+
110
+ - Python >= 3.8
111
+ - requests >= 2.28.0
112
+ - cryptography >= 41.0.0
113
+ - pydantic >= 2.0.0
114
+ - langchain-core >= 0.1.0
115
+ - langgraph >= 0.0.10
116
+
117
+ ## Documentation
118
+
119
+ Full documentation: https://hb-eval.readthedocs.io
120
+
121
+ ## License
122
+
123
+ MIT License — Copyright (c) 2026 Abuelgasim Mohamed Ibrahim Adam
@@ -0,0 +1,16 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ hb_eval_sdk/__init__.py
5
+ hb_eval_sdk/client.py
6
+ hb_eval_sdk/config.py
7
+ hb_eval_sdk/exceptions.py
8
+ hb_eval_sdk/langchain_integration.py
9
+ hb_eval_sdk/models.py
10
+ hb_eval_sdk/utils.py
11
+ hb_eval_sdk.egg-info/PKG-INFO
12
+ hb_eval_sdk.egg-info/SOURCES.txt
13
+ hb_eval_sdk.egg-info/dependency_links.txt
14
+ hb_eval_sdk.egg-info/requires.txt
15
+ hb_eval_sdk.egg-info/top_level.txt
16
+ tests/test_import.py
@@ -0,0 +1,5 @@
1
+ requests>=2.28.0
2
+ cryptography>=41.0.0
3
+ pydantic>=2.0.0
4
+ langchain-core>=0.1.0
5
+ langgraph>=0.0.10
@@ -0,0 +1 @@
1
+ hb_eval_sdk
@@ -0,0 +1,29 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "hb-eval-sdk"
7
+ version = "2.0.0"
8
+ description = "HB-Eval SDK for reliable agent evaluation, semantic memory, and LangChain/LangGraph integration"
9
+ authors = [{name = "Abuelgasim Mohamed Ibrahim Adam", email = "abuelgasim.hbeval@outlook.com"}]
10
+ license = {text = "MIT"}
11
+ readme = "README.md"
12
+ requires-python = ">=3.8"
13
+ classifiers = [
14
+ "Programming Language :: Python :: 3",
15
+ "License :: OSI Approved :: MIT License",
16
+ "Operating System :: OS Independent",
17
+ "Intended Audience :: Developers",
18
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
19
+ ]
20
+ dependencies = [
21
+ "requests>=2.28.0",
22
+ "cryptography>=41.0.0",
23
+ "pydantic>=2.0.0",
24
+ "langchain-core>=0.1.0",
25
+ "langgraph>=0.0.10",
26
+ ]
27
+
28
+ [project.urls]
29
+ Homepage = "https://github.com/hb-evalSystem/HB-System"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,71 @@
1
+ import pytest
2
+ from hb_eval_sdk import (
3
+ HBEvalClient,
4
+ HBEvalCallback,
5
+ EvaluationResult,
6
+ MemoryMatch,
7
+ HBEvalError,
8
+ ConnectionError,
9
+ AuthenticationError,
10
+ EncryptionError,
11
+ SafeHaltError,
12
+ )
13
+ from hb_eval_sdk.models import Verdict, Metrics
14
+
15
+
16
+ def test_imports():
17
+ assert HBEvalClient is not None
18
+ assert HBEvalCallback is not None
19
+ assert EvaluationResult is not None
20
+ assert MemoryMatch is not None
21
+
22
+
23
+ def test_exceptions_hierarchy():
24
+ assert issubclass(ConnectionError, HBEvalError)
25
+ assert issubclass(AuthenticationError, HBEvalError)
26
+ assert issubclass(EncryptionError, HBEvalError)
27
+ assert issubclass(SafeHaltError, HBEvalError)
28
+
29
+
30
+ def test_safe_halt_reason():
31
+ error = SafeHaltError("test message")
32
+ assert error.reason == "CONNECTION_LOST_MANDATORY_HALT"
33
+
34
+ error_custom = SafeHaltError("test message", reason="CUSTOM_REASON")
35
+ assert error_custom.reason == "CUSTOM_REASON"
36
+
37
+
38
+ def test_evaluation_result_model():
39
+ result = EvaluationResult(
40
+ verdict=Verdict.SAFE,
41
+ metrics=Metrics(pei=0.95, irs=0.88),
42
+ )
43
+ assert result.verdict == Verdict.SAFE
44
+ assert result.metrics.pei == 0.95
45
+ assert result.metrics.irs == 0.88
46
+ assert result.safe_halt is False
47
+ assert result.reason is None
48
+
49
+
50
+ def test_memory_match_model():
51
+ match = MemoryMatch(
52
+ id="abc-123",
53
+ trajectory_summary="Agent searched database and returned results.",
54
+ pei_score=0.91,
55
+ irs_score=0.87,
56
+ similarity=0.93,
57
+ )
58
+ assert match.id == "abc-123"
59
+ assert match.similarity == 0.93
60
+
61
+
62
+ def test_client_invalid_aes_key():
63
+ with pytest.raises(EncryptionError):
64
+ HBEvalClient(api_key="test-key", aes_key="not-valid-base64!!!")
65
+
66
+
67
+ def test_client_wrong_key_length():
68
+ import base64
69
+ short_key = base64.b64encode(b"tooshort").decode()
70
+ with pytest.raises(EncryptionError):
71
+ HBEvalClient(api_key="test-key", aes_key=short_key)