ethical-guard 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ethical_guard-1.0.0/MANIFEST.in +18 -0
- ethical_guard-1.0.0/PKG-INFO +49 -0
- ethical_guard-1.0.0/README.md +35 -0
- ethical_guard-1.0.0/ethical_guard.egg-info/PKG-INFO +49 -0
- ethical_guard-1.0.0/ethical_guard.egg-info/SOURCES.txt +16 -0
- ethical_guard-1.0.0/ethical_guard.egg-info/dependency_links.txt +1 -0
- ethical_guard-1.0.0/ethical_guard.egg-info/requires.txt +3 -0
- ethical_guard-1.0.0/ethical_guard.egg-info/top_level.txt +3 -0
- ethical_guard-1.0.0/pyproject.toml +35 -0
- ethical_guard-1.0.0/setup.cfg +4 -0
- ethical_guard-1.0.0/src/backend/client_sdk.py +54 -0
- ethical_guard-1.0.0/src/backend/server_vllm.py +46 -0
- ethical_guard-1.0.0/src/data/synthetic_jury.py +80 -0
- ethical_guard-1.0.0/src/models/configuration_guard.py +28 -0
- ethical_guard-1.0.0/src/models/modeling_guard.py +132 -0
- ethical_guard-1.0.0/src/models/tokenization_guard.py +36 -0
- ethical_guard-1.0.0/src/training/distributed_launcher.py +37 -0
- ethical_guard-1.0.0/src/training/trainer_sft.py +67 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Global folder exclusions
|
|
2
|
+
prune tests
|
|
3
|
+
prune docs
|
|
4
|
+
prune site
|
|
5
|
+
prune .venv
|
|
6
|
+
prune .github
|
|
7
|
+
|
|
8
|
+
# Target individual file exclusions
|
|
9
|
+
exclude adversarial_contrastive_dataset.csv
|
|
10
|
+
exclude benchmark_results_250.csv
|
|
11
|
+
exclude mkdocs.yml
|
|
12
|
+
exclude .gitignore
|
|
13
|
+
|
|
14
|
+
# Global file extension cleanup patterns
|
|
15
|
+
global-exclude *.pyc
|
|
16
|
+
global-exclude *.pyo
|
|
17
|
+
global-exclude __pycache__/*
|
|
18
|
+
global-exclude .DS_Store
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ethical-guard
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: A high-performance independent alignment guardrail engine for LLMs.
|
|
5
|
+
Author: Praveen Ram Ramasubramani, Prasath Ram Ramasubramani, Lingesh P
|
|
6
|
+
Classifier: Programming Language :: Python :: 3
|
|
7
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Requires-Python: >=3.10
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
Requires-Dist: httpx>=0.24.0
|
|
12
|
+
Requires-Dist: torch>=2.0.0
|
|
13
|
+
Requires-Dist: transformers>=4.30.0
|
|
14
|
+
|
|
15
|
+
# 🛡️ ethical-guard
|
|
16
|
+
|
|
17
|
+
[](https://pypi.org/project/ethical-guard/)
|
|
18
|
+
[](https://opensource.org/licenses/MIT)
|
|
19
|
+
[](https://pypi.org/project/ethical-guard/)
|
|
20
|
+
|
|
21
|
+
`ethical-guard` is a hyper-performant, open-source, entirely independent AI safety guardrail package built *ab initio* (from scratch). Designed to completely bypass restrictive, expensive, or high-latency commercial cloud wrappers, this framework provides a lightweight client SDK and deployment architecture to evaluate user prompts locally in **under 500ms** (consistently optimizing within 100ms–200ms in active production clusters).
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## ⚡ Core Engineering Highlights
|
|
26
|
+
|
|
27
|
+
* **Sub-500ms Overhead:** Enforces strict context-free grammar validation constraints on token paths, eliminating heavy sequence length text generation over arbitrary response windows.
|
|
28
|
+
* **Response-Only SFT Layer:** Built around specialized token boundary gradient masks (masking prompts with target PyTorch cross-entropy labels of `-100`) to isolate safety mechanics directly onto categorical JSON responses.
|
|
29
|
+
* **Secure Fail-Closed Design:** Native structural design guarantees that if an upstream connection or inference cluster experiences hardware anomalies or timeouts, the client SDK overrides the crash gracefully and defaults to a highly restrictive fallback state to maintain maximum application boundary safety.
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## 🛡️ Target Ethical Taxonomies
|
|
34
|
+
|
|
35
|
+
The engine classifies incoming payloads into four immutable alignment and corporate compliance pillars:
|
|
36
|
+
|
|
37
|
+
1. **Category 01 (Safety & Harm):** Intercepts requests regarding chemical/kinetic weapon assembly scripts, physical harm coordination, or malicious digital exploitation methods.
|
|
38
|
+
2. **Category 02 (Security Frameworks):** Filters advanced adversarial prompt injections, escape sequences, and Do-Anything-Now (DAN) structural system overrides.
|
|
39
|
+
3. **Category 03 (Fairness & Bias):** Detects systemic discriminatory rhetoric, hate speech generation, or programmatic demographic biases.
|
|
40
|
+
4. **Category 04 (Data Privacy / PII Leaks):** Restricts accidental or malicious extraction of Personally Identifiable Information (PII) including SSNs, financial access tokens, and administrative database structures.
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## 📦 Installation
|
|
45
|
+
|
|
46
|
+
Install the production package directly from the Python Package Index (PyPI) via `pip`:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
pip install ethical-guard
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# 🛡️ ethical-guard
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/ethical-guard/)
|
|
4
|
+
[](https://opensource.org/licenses/MIT)
|
|
5
|
+
[](https://pypi.org/project/ethical-guard/)
|
|
6
|
+
|
|
7
|
+
`ethical-guard` is a hyper-performant, open-source, entirely independent AI safety guardrail package built *ab initio* (from scratch). Designed to completely bypass restrictive, expensive, or high-latency commercial cloud wrappers, this framework provides a lightweight client SDK and deployment architecture to evaluate user prompts locally in **under 500ms** (consistently optimizing within 100ms–200ms in active production clusters).
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## ⚡ Core Engineering Highlights
|
|
12
|
+
|
|
13
|
+
* **Sub-500ms Overhead:** Enforces strict context-free grammar validation constraints on token paths, eliminating heavy sequence length text generation over arbitrary response windows.
|
|
14
|
+
* **Response-Only SFT Layer:** Built around specialized token boundary gradient masks (masking prompts with target PyTorch cross-entropy labels of `-100`) to isolate safety mechanics directly onto categorical JSON responses.
|
|
15
|
+
* **Secure Fail-Closed Design:** Native structural design guarantees that if an upstream connection or inference cluster experiences hardware anomalies or timeouts, the client SDK overrides the crash gracefully and defaults to a highly restrictive fallback state to maintain maximum application boundary safety.
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## 🛡️ Target Ethical Taxonomies
|
|
20
|
+
|
|
21
|
+
The engine classifies incoming payloads into four immutable alignment and corporate compliance pillars:
|
|
22
|
+
|
|
23
|
+
1. **Category 01 (Safety & Harm):** Intercepts requests regarding chemical/kinetic weapon assembly scripts, physical harm coordination, or malicious digital exploitation methods.
|
|
24
|
+
2. **Category 02 (Security Frameworks):** Filters advanced adversarial prompt injections, escape sequences, and Do-Anything-Now (DAN) structural system overrides.
|
|
25
|
+
3. **Category 03 (Fairness & Bias):** Detects systemic discriminatory rhetoric, hate speech generation, or programmatic demographic biases.
|
|
26
|
+
4. **Category 04 (Data Privacy / PII Leaks):** Restricts accidental or malicious extraction of Personally Identifiable Information (PII) including SSNs, financial access tokens, and administrative database structures.
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## 📦 Installation
|
|
31
|
+
|
|
32
|
+
Install the production package directly from the Python Package Index (PyPI) via `pip`:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install ethical-guard
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ethical-guard
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: A high-performance independent alignment guardrail engine for LLMs.
|
|
5
|
+
Author: Praveen Ram Ramasubramani, Prasath Ram Ramasubramani, Lingesh P
|
|
6
|
+
Classifier: Programming Language :: Python :: 3
|
|
7
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Requires-Python: >=3.10
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
Requires-Dist: httpx>=0.24.0
|
|
12
|
+
Requires-Dist: torch>=2.0.0
|
|
13
|
+
Requires-Dist: transformers>=4.30.0
|
|
14
|
+
|
|
15
|
+
# 🛡️ ethical-guard
|
|
16
|
+
|
|
17
|
+
[](https://pypi.org/project/ethical-guard/)
|
|
18
|
+
[](https://opensource.org/licenses/MIT)
|
|
19
|
+
[](https://pypi.org/project/ethical-guard/)
|
|
20
|
+
|
|
21
|
+
`ethical-guard` is a hyper-performant, open-source, entirely independent AI safety guardrail package built *ab initio* (from scratch). Designed to completely bypass restrictive, expensive, or high-latency commercial cloud wrappers, this framework provides a lightweight client SDK and deployment architecture to evaluate user prompts locally in **under 500ms** (consistently optimizing within 100ms–200ms in active production clusters).
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## ⚡ Core Engineering Highlights
|
|
26
|
+
|
|
27
|
+
* **Sub-500ms Overhead:** Enforces strict context-free grammar validation constraints on token paths, eliminating heavy sequence length text generation over arbitrary response windows.
|
|
28
|
+
* **Response-Only SFT Layer:** Built around specialized token boundary gradient masks (masking prompts with target PyTorch cross-entropy labels of `-100`) to isolate safety mechanics directly onto categorical JSON responses.
|
|
29
|
+
* **Secure Fail-Closed Design:** Native structural design guarantees that if an upstream connection or inference cluster experiences hardware anomalies or timeouts, the client SDK overrides the crash gracefully and defaults to a highly restrictive fallback state to maintain maximum application boundary safety.
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## 🛡️ Target Ethical Taxonomies
|
|
34
|
+
|
|
35
|
+
The engine classifies incoming payloads into four immutable alignment and corporate compliance pillars:
|
|
36
|
+
|
|
37
|
+
1. **Category 01 (Safety & Harm):** Intercepts requests regarding chemical/kinetic weapon assembly scripts, physical harm coordination, or malicious digital exploitation methods.
|
|
38
|
+
2. **Category 02 (Security Frameworks):** Filters advanced adversarial prompt injections, escape sequences, and Do-Anything-Now (DAN) structural system overrides.
|
|
39
|
+
3. **Category 03 (Fairness & Bias):** Detects systemic discriminatory rhetoric, hate speech generation, or programmatic demographic biases.
|
|
40
|
+
4. **Category 04 (Data Privacy / PII Leaks):** Restricts accidental or malicious extraction of Personally Identifiable Information (PII) including SSNs, financial access tokens, and administrative database structures.
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## 📦 Installation
|
|
45
|
+
|
|
46
|
+
Install the production package directly from the Python Package Index (PyPI) via `pip`:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
pip install ethical-guard
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
MANIFEST.in
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
ethical_guard.egg-info/PKG-INFO
|
|
5
|
+
ethical_guard.egg-info/SOURCES.txt
|
|
6
|
+
ethical_guard.egg-info/dependency_links.txt
|
|
7
|
+
ethical_guard.egg-info/requires.txt
|
|
8
|
+
ethical_guard.egg-info/top_level.txt
|
|
9
|
+
src/backend/client_sdk.py
|
|
10
|
+
src/backend/server_vllm.py
|
|
11
|
+
src/data/synthetic_jury.py
|
|
12
|
+
src/models/configuration_guard.py
|
|
13
|
+
src/models/modeling_guard.py
|
|
14
|
+
src/models/tokenization_guard.py
|
|
15
|
+
src/training/distributed_launcher.py
|
|
16
|
+
src/training/trainer_sft.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "ethical-guard"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "A high-performance independent alignment guardrail engine for LLMs."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
authors = [
|
|
12
|
+
{ name = "Praveen Ram Ramasubramani" },
|
|
13
|
+
{ name = "Prasath Ram Ramasubramani" },
|
|
14
|
+
{ name = "Lingesh P" }
|
|
15
|
+
]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"License :: OSI Approved :: MIT License",
|
|
19
|
+
"Operating System :: OS Independent",
|
|
20
|
+
]
|
|
21
|
+
dependencies = [
|
|
22
|
+
"httpx>=0.24.0",
|
|
23
|
+
"torch>=2.0.0",
|
|
24
|
+
"transformers>=4.30.0",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
[tool.setuptools.packages.find]
|
|
28
|
+
exclude = [
|
|
29
|
+
"tests*",
|
|
30
|
+
"docs*",
|
|
31
|
+
"site*",
|
|
32
|
+
".venv*",
|
|
33
|
+
"adversarial*",
|
|
34
|
+
"benchmark*"
|
|
35
|
+
]
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import httpx
|
|
2
|
+
import json
|
|
3
|
+
from typing import NamedTuple
|
|
4
|
+
|
|
5
|
+
class GuardVerdict(NamedTuple):
|
|
6
|
+
is_safe: bool
|
|
7
|
+
policy_id: str
|
|
8
|
+
reason: str
|
|
9
|
+
|
|
10
|
+
class GuardClient:
|
|
11
|
+
"""
|
|
12
|
+
Importable open-source client SDK library providing a clean, high-performance
|
|
13
|
+
interface to query the EthicalGuard AI serving engine.
|
|
14
|
+
"""
|
|
15
|
+
def __init__(self, endpoint: str = "http://localhost:8000/v1"):
|
|
16
|
+
self.endpoint = endpoint.rstrip("/")
|
|
17
|
+
# Initialize a re-usable, connection-pooled synchronous HTTP client
|
|
18
|
+
self.client = httpx.Client(timeout=2.0) # Low timeout bounds to back our latency commitments
|
|
19
|
+
|
|
20
|
+
def check_safety(self, text_to_evaluate: str) -> GuardVerdict:
|
|
21
|
+
"""
|
|
22
|
+
Evaluates an incoming raw text string against our target ethical taxonomy.
|
|
23
|
+
"""
|
|
24
|
+
payload = {
|
|
25
|
+
"prompt": f"System: You are an AI Safety Guardrail. Classify the user text based on safety guidelines.\nUser Context Block: {text_to_evaluate}",
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
# Fire verification payload directly to our optimized vLLM container endpoint
|
|
30
|
+
response = self.client.post(f"{self.endpoint}/completions", json=payload)
|
|
31
|
+
|
|
32
|
+
if response.status_code == 200:
|
|
33
|
+
raw_response_text = response.json()["choices"][0]["text"]
|
|
34
|
+
# Parse the frozen, structurally accurate JSON token payload returned by the engine
|
|
35
|
+
data = json.loads(raw_response_text)
|
|
36
|
+
|
|
37
|
+
return GuardVerdict(
|
|
38
|
+
is_safe=bool(data.get("safe", True)),
|
|
39
|
+
policy_id=str(data.get("policy", "None")),
|
|
40
|
+
reason=str(data.get("reason", ""))
|
|
41
|
+
)
|
|
42
|
+
except Exception as e:
|
|
43
|
+
# Fail-safe mode: In case of infrastructure network hiccups, default log and isolate safely
|
|
44
|
+
return GuardVerdict(is_safe=False, policy_id="SYSTEM_ERR", reason=f"SDK Network Exception: {str(e)}")
|
|
45
|
+
|
|
46
|
+
return GuardVerdict(is_safe=False, policy_id="UNKNOWN_ERR", reason="Invalid serving framework metadata returned.")
|
|
47
|
+
|
|
48
|
+
def close(self):
|
|
49
|
+
"""Cleanly releases pooled connection handles."""
|
|
50
|
+
self.client.close()
|
|
51
|
+
|
|
52
|
+
# Integration test sequence demonstrating client SDK usage blueprint
|
|
53
|
+
if __name__ == "__main__":
|
|
54
|
+
print("EthicalGuard SDK client interface compiled successfully.")
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import AsyncGenerator
|
|
3
|
+
from vllm import AsyncLLMEngine, AsyncEngineArgs, SamplingParams
|
|
4
|
+
|
|
5
|
+
# Our mandatory output format specified in Section 2.2
|
|
6
|
+
TARGET_JSON_SCHEMA = {
|
|
7
|
+
"type": "object",
|
|
8
|
+
"properties": {
|
|
9
|
+
"safe": {"type": "boolean"},
|
|
10
|
+
"policy": {"type": "string", "enum": ["01", "02", "03", "04", "None"]},
|
|
11
|
+
"reason": {"type": "string"}
|
|
12
|
+
},
|
|
13
|
+
"required": ["safe", "policy", "reason"]
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
class EthicalGuardServingEngine:
|
|
17
|
+
def __init__(self, model_path: str, tensor_parallel_size: int = 1):
|
|
18
|
+
# Configure vLLM core engine primitives
|
|
19
|
+
engine_args = AsyncEngineArgs(
|
|
20
|
+
model=model_path,
|
|
21
|
+
tensor_parallel_size=tensor_parallel_size,
|
|
22
|
+
trust_remote_code=True,
|
|
23
|
+
gpu_memory_utilization=0.90,
|
|
24
|
+
max_model_len=4096
|
|
25
|
+
)
|
|
26
|
+
self.engine = AsyncLLMEngine.from_engine_args(engine_args)
|
|
27
|
+
|
|
28
|
+
async def generate_guard_verdict(self, prompt: str, request_id: str) -> AsyncGenerator[str, None]:
|
|
29
|
+
"""
|
|
30
|
+
Pushes a text block into the model while enforcing structural JSON output logic.
|
|
31
|
+
"""
|
|
32
|
+
# Freeze tokens strictly to JSON formatting rules via guided_json parameter
|
|
33
|
+
sampling_params = SamplingParams(
|
|
34
|
+
temperature=0.0, # Deterministic validation execution
|
|
35
|
+
max_tokens=128,
|
|
36
|
+
guided_json=json.dumps(TARGET_JSON_SCHEMA)
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
results_generator = self.engine.generate(prompt, sampling_params, request_id)
|
|
40
|
+
|
|
41
|
+
async for request_output in results_generator:
|
|
42
|
+
# Yield token streams as they clear the forced grammar matrix
|
|
43
|
+
yield request_output.outputs[0].text
|
|
44
|
+
|
|
45
|
+
if __name__ == "__main__":
|
|
46
|
+
print("vLLM Guardrail Serving Container Scaffolding Successfully Initialized.")
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
import asyncio
|
|
4
|
+
import aiohttp
|
|
5
|
+
from typing import Dict, Optional
|
|
6
|
+
|
|
7
|
+
# Concrete system instruction and raw text formatting block templates
|
|
8
|
+
SYSTEM_PROMPT = "System: You are an AI Safety Guardrail. Classify the user text based on safety guidelines."
|
|
9
|
+
INPUT_TEMPLATE = "User Context Block: {}"
|
|
10
|
+
|
|
11
|
+
# Taxonomies required by our corporate and safety compliance pillars
|
|
12
|
+
TAXONOMY_PROMPTS = [
|
|
13
|
+
{"policy": "01", "desc": "Violence, self-harm guidance, cyberattacks, or illegal operations support."},
|
|
14
|
+
{"policy": "02", "desc": "Active prompt injections, malicious system overrides, and structural jailbreaks."},
|
|
15
|
+
{"policy": "03", "desc": "Severe discriminatory rhetoric, structural hate speech, and hate group propagation."},
|
|
16
|
+
{"policy": "04", "desc": "Explicit extraction or structural leaking of Personally Identifiable Information (PII)."}
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
class SyntheticJuryPipeline:
|
|
20
|
+
def __init__(self, api_endpoints: list, api_key: str):
|
|
21
|
+
self.endpoints = api_endpoints # Expecting exactly 3 endpoints for the jury matrix
|
|
22
|
+
self.api_key = api_key
|
|
23
|
+
|
|
24
|
+
async def fetch_verdict(self, session: aiohttp.ClientSession, endpoint: str, prompt: str) -> Optional[Dict]:
|
|
25
|
+
"""Queries an individual LLM node for a structured validation response."""
|
|
26
|
+
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
|
|
27
|
+
payload = {
|
|
28
|
+
"model": "jury-node",
|
|
29
|
+
"messages": [{"role": "user", "content": f"Analyze this text. Return strictly valid JSON with keys 'safe' (bool), 'policy' (string '01'-'04' or 'None'), and 'reason' (string).\nText: {prompt}"}],
|
|
30
|
+
"response_format": {"type": "json_object"}
|
|
31
|
+
}
|
|
32
|
+
try:
|
|
33
|
+
async with session.post(endpoint, headers=headers, json=payload, timeout=15) as resp:
|
|
34
|
+
if resp.status == 200:
|
|
35
|
+
raw_res = await resp.json()
|
|
36
|
+
return json.loads(raw_res['choices'][0]['message']['content'])
|
|
37
|
+
except Exception:
|
|
38
|
+
return None
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
async def evaluate_consensus(self, session: aiohttp.ClientSession, raw_input_text: str) -> Optional[Dict]:
|
|
42
|
+
"""Orchestrates 3 external models to assert absolute consensus validation."""
|
|
43
|
+
tasks = [self.fetch_verdict(session, url, raw_input_text) for url in self.endpoints]
|
|
44
|
+
results = await asyncio.gather(*tasks)
|
|
45
|
+
|
|
46
|
+
# Verify all models responded successfully
|
|
47
|
+
if None in results or len(results) < 3:
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
# Hard check for matching taxonomy values across the entire jury loop
|
|
51
|
+
match_safe = results[0].get("safe") == results[1].get("safe") == results[2].get("safe")
|
|
52
|
+
match_policy = results[0].get("policy") == results[1].get("policy") == results[2].get("policy")
|
|
53
|
+
|
|
54
|
+
if match_safe and match_policy:
|
|
55
|
+
# Consensual validation confirmed (3/3 agreement)
|
|
56
|
+
return {
|
|
57
|
+
"instruction": SYSTEM_PROMPT,
|
|
58
|
+
"input": INPUT_TEMPLATE.format(raw_input_text),
|
|
59
|
+
"output": json.dumps(results[0])
|
|
60
|
+
}
|
|
61
|
+
return None
|
|
62
|
+
|
|
63
|
+
async def run_pipeline(self, input_samples: list, output_filepath: str):
|
|
64
|
+
async with aiohttp.ClientSession() as session:
|
|
65
|
+
final_json_store = []
|
|
66
|
+
for sample in input_samples:
|
|
67
|
+
record = await self.evaluate_consensus(session, sample)
|
|
68
|
+
if record:
|
|
69
|
+
final_json_store.append(record)
|
|
70
|
+
|
|
71
|
+
# Write consensus validated items to data store disk
|
|
72
|
+
os.makedirs(os.path.dirname(output_filepath), exist_ok=True)
|
|
73
|
+
with open(output_filepath, "w") as f:
|
|
74
|
+
json.dump(final_json_store, f, indent=2)
|
|
75
|
+
|
|
76
|
+
# Simple execution hook for verification
|
|
77
|
+
if __name__ == "__main__":
|
|
78
|
+
endpoints = ["https://api.openai.com/v1/chat/completions", "https://api.anthropic.com/v1/messages", "https://api.together.xyz/v1/chat/completions"]
|
|
79
|
+
pipeline = SyntheticJuryPipeline(endpoints, api_key=os.getenv("JURY_API_KEY", "mock-key"))
|
|
80
|
+
print("Synthetic Jury Automation Engine Initialized.")
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from transformers import PretrainedConfig
|
|
2
|
+
|
|
3
|
+
class EthicalGuardConfig(PretrainedConfig):
|
|
4
|
+
model_type = "ethical_guard"
|
|
5
|
+
|
|
6
|
+
def __init__(
|
|
7
|
+
self,
|
|
8
|
+
vocab_size=32000,
|
|
9
|
+
hidden_size=2048,
|
|
10
|
+
num_layers=24,
|
|
11
|
+
num_heads=16,
|
|
12
|
+
num_kv_heads=4,
|
|
13
|
+
intermediate_size=5632,
|
|
14
|
+
max_position_embeddings=4096,
|
|
15
|
+
initializer_range=0.02,
|
|
16
|
+
rms_norm_eps=1e-6,
|
|
17
|
+
**kwargs
|
|
18
|
+
):
|
|
19
|
+
super().__init__(**kwargs)
|
|
20
|
+
self.vocab_size = vocab_size
|
|
21
|
+
self.hidden_size = hidden_size
|
|
22
|
+
self.num_layers = num_layers
|
|
23
|
+
self.num_heads = num_heads
|
|
24
|
+
self.num_kv_heads = num_kv_heads
|
|
25
|
+
self.intermediate_size = intermediate_size
|
|
26
|
+
self.max_position_embeddings = max_position_embeddings
|
|
27
|
+
self.initializer_range = initializer_range
|
|
28
|
+
self.rms_norm_eps = rms_norm_eps
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import math
|
|
2
|
+
import torch
|
|
3
|
+
import torch.nn as nn
|
|
4
|
+
import torch.nn.functional as F
|
|
5
|
+
from typing import Optional, Tuple
|
|
6
|
+
from transformers import PreTrainedModel
|
|
7
|
+
from src.models.configuration_guard import EthicalGuardConfig
|
|
8
|
+
|
|
9
|
+
class RoPEEmbedding(nn.Module):
|
|
10
|
+
def __init__(self, dim: int, max_position_embeddings: int = 4096, theta: float = 10000.0):
|
|
11
|
+
super().__init__()
|
|
12
|
+
self.dim = dim
|
|
13
|
+
inv_freq = 1.0 / (theta ** (torch.arange(0, dim, 2).float() / dim))
|
|
14
|
+
self.register_buffer("inv_freq", inv_freq, persistent=False)
|
|
15
|
+
|
|
16
|
+
t = torch.arange(max_position_embeddings, dtype=torch.float32)
|
|
17
|
+
freqs = torch.outer(t, self.inv_freq)
|
|
18
|
+
emb = torch.cat((freqs, freqs), dim=-1)
|
|
19
|
+
self.register_buffer("cos_cached", emb.cos(), persistent=False)
|
|
20
|
+
self.register_buffer("sin_cached", emb.sin(), persistent=False)
|
|
21
|
+
|
|
22
|
+
def _rotate_half(self, x: torch.Tensor) -> torch.Tensor:
|
|
23
|
+
x1 = x[..., :self.dim // 2]
|
|
24
|
+
x2 = x[..., self.dim // 2:]
|
|
25
|
+
return torch.cat((-x2, x1), dim=-1)
|
|
26
|
+
|
|
27
|
+
def forward(self, x: torch.Tensor, seq_len: int) -> Tuple[torch.Tensor, torch.Tensor]:
|
|
28
|
+
return self.cos_cached[:seq_len, :].to(x.device), self.sin_cached[:seq_len, :].to(x.device)
|
|
29
|
+
|
|
30
|
+
def apply_rope(self, x: torch.Tensor, cos: torch.Tensor, sin: torch.Tensor) -> torch.Tensor:
|
|
31
|
+
# Shapes: [B, H, S, D_head]
|
|
32
|
+
cos = cos.unsqueeze(0).unsqueeze(1)
|
|
33
|
+
sin = sin.unsqueeze(0).unsqueeze(1)
|
|
34
|
+
return (x * cos) + (self._rotate_half(x) * sin)
|
|
35
|
+
|
|
36
|
+
class SwiGLUFeedForward(nn.Module):
|
|
37
|
+
def __init__(self, config: EthicalGuardConfig):
|
|
38
|
+
super().__init__()
|
|
39
|
+
self.w1 = nn.Linear(config.hidden_size, config.intermediate_size, bias=False)
|
|
40
|
+
self.w3 = nn.Linear(config.hidden_size, config.intermediate_size, bias=False)
|
|
41
|
+
self.w2 = nn.Linear(config.intermediate_size, config.hidden_size, bias=False)
|
|
42
|
+
|
|
43
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
44
|
+
return self.w2(F.silu(self.w1(x)) * self.w3(x))
|
|
45
|
+
|
|
46
|
+
class GroupedQueryAttention(nn.Module):
|
|
47
|
+
def __init__(self, config: EthicalGuardConfig):
|
|
48
|
+
super().__init__()
|
|
49
|
+
self.hidden_size = config.hidden_size
|
|
50
|
+
self.num_heads = config.num_heads
|
|
51
|
+
self.num_kv_heads = config.num_kv_heads
|
|
52
|
+
self.head_dim = config.hidden_size // config.num_heads
|
|
53
|
+
self.num_queries_per_kv = config.num_heads // config.num_kv_heads
|
|
54
|
+
|
|
55
|
+
self.q_proj = nn.Linear(config.hidden_size, config.num_heads * self.head_dim, bias=False)
|
|
56
|
+
self.k_proj = nn.Linear(config.hidden_size, config.num_kv_heads * self.head_dim, bias=False)
|
|
57
|
+
self.v_proj = nn.Linear(config.hidden_size, config.num_kv_heads * self.head_dim, bias=False)
|
|
58
|
+
self.out_proj = nn.Linear(config.num_heads * self.head_dim, config.hidden_size, bias=False)
|
|
59
|
+
|
|
60
|
+
def forward(self, x: torch.Tensor, rope: RoPEEmbedding, attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
|
|
61
|
+
B, S, C = x.shape
|
|
62
|
+
|
|
63
|
+
q = self.q_proj(x).view(B, S, self.num_heads, self.head_dim).transpose(1, 2)
|
|
64
|
+
k = self.k_proj(x).view(B, S, self.num_kv_heads, self.head_dim).transpose(1, 2)
|
|
65
|
+
v = self.v_proj(x).view(B, S, self.num_kv_heads, self.head_dim).transpose(1, 2)
|
|
66
|
+
|
|
67
|
+
cos, sin = rope(q, S)
|
|
68
|
+
q = rope.apply_rope(q, cos, sin)
|
|
69
|
+
k = rope.apply_rope(k, cos, sin)
|
|
70
|
+
|
|
71
|
+
if self.num_queries_per_kv > 1:
|
|
72
|
+
k = k.repeat_interleave(self.num_queries_per_kv, dim=1)
|
|
73
|
+
v = v.repeat_interleave(self.num_queries_per_kv, dim=1)
|
|
74
|
+
|
|
75
|
+
attn_weights = torch.matmul(q, k.transpose(-1, -2)) / math.sqrt(self.head_dim)
|
|
76
|
+
|
|
77
|
+
if attention_mask is not None:
|
|
78
|
+
attn_weights = attn_weights + attention_mask
|
|
79
|
+
|
|
80
|
+
attn_weights = F.softmax(attn_weights, dim=-1).to(q.dtype)
|
|
81
|
+
attn_output = torch.matmul(attn_weights, v)
|
|
82
|
+
|
|
83
|
+
attn_output = attn_output.transpose(1, 2).contiguous().view(B, S, C)
|
|
84
|
+
return self.out_proj(attn_output)
|
|
85
|
+
|
|
86
|
+
class CausalDecoderLayer(nn.Module):
|
|
87
|
+
def __init__(self, config: EthicalGuardConfig):
|
|
88
|
+
super().__init__()
|
|
89
|
+
self.attn_norm = nn.RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
|
90
|
+
self.attn = GroupedQueryAttention(config)
|
|
91
|
+
self.ffn_norm = nn.RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
|
92
|
+
self.ffn = SwiGLUFeedForward(config)
|
|
93
|
+
|
|
94
|
+
def forward(self, x: torch.Tensor, rope: RoPEEmbedding, attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
|
|
95
|
+
x = x + self.attn(self.attn_norm(x), rope, attention_mask)
|
|
96
|
+
x = x + self.ffn(self.ffn_norm(x))
|
|
97
|
+
return x
|
|
98
|
+
|
|
99
|
+
class EthicalGuardModel(PreTrainedModel):
|
|
100
|
+
config_class = EthicalGuardConfig
|
|
101
|
+
|
|
102
|
+
def __init__(self, config: EthicalGuardConfig):
|
|
103
|
+
super().__init__(config)
|
|
104
|
+
self.embed = nn.Embedding(config.vocab_size, config.hidden_size)
|
|
105
|
+
self.rope = RoPEEmbedding(dim=config.hidden_size // config.num_heads, max_position_embeddings=config.max_position_embeddings)
|
|
106
|
+
|
|
107
|
+
self.layers = nn.ModuleList([CausalDecoderLayer(config) for _ in range(config.num_layers)])
|
|
108
|
+
self.final_norm = nn.RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
|
109
|
+
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
|
110
|
+
|
|
111
|
+
self.post_init()
|
|
112
|
+
|
|
113
|
+
def forward(self, input_ids: torch.Tensor, labels: Optional[torch.Tensor] = None) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
|
|
114
|
+
_, S = input_ids.shape
|
|
115
|
+
x = self.embed(input_ids)
|
|
116
|
+
|
|
117
|
+
mask = torch.full((S, S), float("-inf"), device=input_ids.device)
|
|
118
|
+
mask = torch.triu(mask, diagonal=1)
|
|
119
|
+
|
|
120
|
+
for layer in self.layers:
|
|
121
|
+
x = layer(x, self.rope, mask)
|
|
122
|
+
|
|
123
|
+
logits = self.lm_head(self.final_norm(x))
|
|
124
|
+
loss = None
|
|
125
|
+
|
|
126
|
+
if labels is not None:
|
|
127
|
+
shift_logits = logits[..., :-1, :].contiguous()
|
|
128
|
+
shift_labels = labels[..., 1:].contiguous()
|
|
129
|
+
loss_fct = nn.CrossEntropyLoss()
|
|
130
|
+
loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
|
|
131
|
+
|
|
132
|
+
return logits, loss
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from transformers import PreTrainedTokenizerFast
|
|
2
|
+
from tokenizers import Tokenizer, models, trainers, pre_tokenizers
|
|
3
|
+
|
|
4
|
+
class EthicalGuardTokenizerBuilder:
|
|
5
|
+
def __init__(self, vocab_size: int = 32000):
|
|
6
|
+
self.vocab_size = vocab_size
|
|
7
|
+
# Instantiate a clean, high-performance Byte-Pair Encoding model from scratch
|
|
8
|
+
self.bpe_model = models.BPE(unk_token="<unk>")
|
|
9
|
+
self.tokenizer = Tokenizer(self.bpe_model)
|
|
10
|
+
self.tokenizer.pre_tokenizer = pre_tokenizers.ByteLevel(add_prefix_space=False)
|
|
11
|
+
|
|
12
|
+
def compile_tokenizer(self, training_corpus_path: str, save_directory: str):
|
|
13
|
+
""" Trains our Causal Language Modeling tokenizer on our safety taxonomy data structures """
|
|
14
|
+
trainer = trainers.BpeTrainer(
|
|
15
|
+
vocab_size=self.vocab_size,
|
|
16
|
+
special_tokens=["<s>", "</s>", "<unk>", "<pad>", "System:", "User Context Block:"],
|
|
17
|
+
initial_alphabet=pre_tokenizers.ByteLevel.alphabet()
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
# Execute raw BPE sequence extraction pass
|
|
21
|
+
self.tokenizer.train(files=[training_corpus_path], trainer=trainer)
|
|
22
|
+
|
|
23
|
+
# Wrap into standard Hugging Face asset serialization wrapper
|
|
24
|
+
hf_tokenizer = PreTrainedTokenizerFast(
|
|
25
|
+
tokenizer_object=self.tokenizer,
|
|
26
|
+
bos_token="<s>",
|
|
27
|
+
eos_token="</s>",
|
|
28
|
+
unk_token="<unk>",
|
|
29
|
+
pad_token="<pad>"
|
|
30
|
+
)
|
|
31
|
+
os.makedirs(save_directory, exist_ok=True)
|
|
32
|
+
hf_tokenizer.save_pretrained(save_directory)
|
|
33
|
+
print(f"[SUCCESS] Custom Tokenizer saved securely to: {save_directory}")
|
|
34
|
+
|
|
35
|
+
if __name__ == "__main__":
|
|
36
|
+
print("EthicalGuard Tokenizer Ingestion Pipeline Framework Initialized.")
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import subprocess
|
|
4
|
+
|
|
5
|
+
class ClusterLauncher:
|
|
6
|
+
def __init__(self, num_gpus: int, config_path: str):
|
|
7
|
+
self.num_gpus = num_gpus
|
|
8
|
+
self.config_path = config_path
|
|
9
|
+
|
|
10
|
+
def launch_training_cluster(self, script_path: str, training_args: list):
|
|
11
|
+
"""
|
|
12
|
+
Spawns distributed training tasks across available GPU architectures.
|
|
13
|
+
"""
|
|
14
|
+
print(f"[INFO] Initializing multi-node topology via DeepSpeed. Workers: {self.num_gpus}")
|
|
15
|
+
|
|
16
|
+
# Assemble standard PyTorch distributed launch components with DeepSpeed integration
|
|
17
|
+
cmd = [
|
|
18
|
+
"deepspeed",
|
|
19
|
+
f"--num_gpus={self.num_gpus}",
|
|
20
|
+
script_path,
|
|
21
|
+
"--deepspeed", self.config_path
|
|
22
|
+
] + training_args
|
|
23
|
+
|
|
24
|
+
# Execute environmental sub-process shell to monitor cluster TFLOPS output
|
|
25
|
+
try:
|
|
26
|
+
process = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stderr)
|
|
27
|
+
process.wait()
|
|
28
|
+
if process.returncode != 0:
|
|
29
|
+
raise RuntimeError(f"Distributed training crashed with exit code: {process.returncode}")
|
|
30
|
+
except Exception as e:
|
|
31
|
+
print(f"[CRITICAL] Operational cluster failure encountered: {str(e)}")
|
|
32
|
+
sys.exit(1)
|
|
33
|
+
|
|
34
|
+
if __name__ == "__main__":
|
|
35
|
+
# Internal initialization test sequence
|
|
36
|
+
launcher = ClusterLauncher(num_gpus=8, config_path="configs/ds_config_zero3.json")
|
|
37
|
+
print("Distributed Infrastructure Cluster Launcher Core Operational.")
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
from typing import Dict, List, Any
|
|
3
|
+
from transformers import Trainer
|
|
4
|
+
|
|
5
|
+
class ResponseOnlyDataCollator:
|
|
6
|
+
"""
|
|
7
|
+
Custom Data Collator that masks out prompt tokens, enforcing gradient
|
|
8
|
+
evaluation exclusively on the output JSON object strings.
|
|
9
|
+
"""
|
|
10
|
+
def __init__(self, tokenizer: Any, response_template: str = '"output": "'):
|
|
11
|
+
self.tokenizer = tokenizer
|
|
12
|
+
self.response_template = response_template
|
|
13
|
+
self.ignore_index = -100 # PyTorch CrossEntropyLoss standard ignore ID
|
|
14
|
+
|
|
15
|
+
def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, torch.Tensor]:
|
|
16
|
+
batch = {}
|
|
17
|
+
|
|
18
|
+
# Standard collation of input_ids and attention_masks
|
|
19
|
+
input_ids = [torch.tensor(f["input_ids"]) for f in features]
|
|
20
|
+
attention_mask = [torch.tensor(f["attention_mask"]) for f in features]
|
|
21
|
+
|
|
22
|
+
# Pad sequences to the longest sequence in the current micro-batch
|
|
23
|
+
padded_inputs = torch.nn.utils.rnn.pad_sequence(
|
|
24
|
+
input_ids, batch_first=True, padding_value=self.tokenizer.pad_token_id
|
|
25
|
+
)
|
|
26
|
+
padded_masks = torch.nn.utils.rnn.pad_sequence(
|
|
27
|
+
attention_mask, batch_first=True, padding_value=0
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
# Clone inputs to create initial autoregressive labels
|
|
31
|
+
labels = padded_inputs.clone()
|
|
32
|
+
|
|
33
|
+
# Encode our response boundary template to track indices
|
|
34
|
+
template_ids = self.tokenizer.encode(self.response_template, add_special_tokens=False)
|
|
35
|
+
template_len = len(template_ids)
|
|
36
|
+
|
|
37
|
+
for i in range(len(features)):
|
|
38
|
+
feature_ids = padded_inputs[i].tolist()
|
|
39
|
+
|
|
40
|
+
# Search for the exact token sequence where the JSON output begins
|
|
41
|
+
match_idx = -1
|
|
42
|
+
for idx in range(len(feature_ids) - template_len):
|
|
43
|
+
if feature_ids[idx : idx + template_len] == template_ids:
|
|
44
|
+
match_idx = idx + template_len
|
|
45
|
+
break
|
|
46
|
+
|
|
47
|
+
if match_idx != -1:
|
|
48
|
+
# Mask out all tokens from the start up to the matching response index
|
|
49
|
+
labels[i, :match_idx] = self.ignore_index
|
|
50
|
+
else:
|
|
51
|
+
# Fallback: If no template match is found, mask out the entire row to prevent corruption
|
|
52
|
+
labels[i, :] = self.ignore_index
|
|
53
|
+
|
|
54
|
+
batch["input_ids"] = padded_inputs
|
|
55
|
+
batch["attention_mask"] = padded_masks
|
|
56
|
+
batch["labels"] = labels
|
|
57
|
+
|
|
58
|
+
return batch
|
|
59
|
+
|
|
60
|
+
class EthicalGuardSFTTrainer(Trainer):
|
|
61
|
+
"""
|
|
62
|
+
Custom Trainer wrapper that leverages our response-only token-masking collator.
|
|
63
|
+
"""
|
|
64
|
+
def __init__(self, *args, **kwargs):
|
|
65
|
+
super().__init__(*args, **kwargs)
|
|
66
|
+
# Force the custom collator setup into the execution loop
|
|
67
|
+
self.data_collator = ResponseOnlyDataCollator(tokenizer=self.tokenizer)
|