safety-agent 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. safety_agent-0.1.0/.gitignore +118 -0
  2. safety_agent-0.1.0/PKG-INFO +153 -0
  3. safety_agent-0.1.0/README.md +128 -0
  4. safety_agent-0.1.0/pyproject.toml +49 -0
  5. safety_agent-0.1.0/src/safety_agent/__init__.py +45 -0
  6. safety_agent-0.1.0/src/safety_agent/client.py +520 -0
  7. safety_agent-0.1.0/src/safety_agent/prompts/__init__.py +23 -0
  8. safety_agent-0.1.0/src/safety_agent/prompts/guard.py +69 -0
  9. safety_agent-0.1.0/src/safety_agent/prompts/redact.py +192 -0
  10. safety_agent-0.1.0/src/safety_agent/providers/__init__.py +134 -0
  11. safety_agent-0.1.0/src/safety_agent/providers/anthropic.py +120 -0
  12. safety_agent-0.1.0/src/safety_agent/providers/bedrock.py +210 -0
  13. safety_agent-0.1.0/src/safety_agent/providers/fireworks.py +74 -0
  14. safety_agent-0.1.0/src/safety_agent/providers/google.py +124 -0
  15. safety_agent-0.1.0/src/safety_agent/providers/groq.py +74 -0
  16. safety_agent-0.1.0/src/safety_agent/providers/openai.py +103 -0
  17. safety_agent-0.1.0/src/safety_agent/providers/openrouter.py +74 -0
  18. safety_agent-0.1.0/src/safety_agent/providers/superagent.py +119 -0
  19. safety_agent-0.1.0/src/safety_agent/providers/types.py +72 -0
  20. safety_agent-0.1.0/src/safety_agent/providers/vercel.py +80 -0
  21. safety_agent-0.1.0/src/safety_agent/schemas.py +64 -0
  22. safety_agent-0.1.0/src/safety_agent/types.py +257 -0
  23. safety_agent-0.1.0/src/safety_agent/utils/__init__.py +7 -0
  24. safety_agent-0.1.0/src/safety_agent/utils/input_processor.py +266 -0
  25. safety_agent-0.1.0/tests/conftest.py +21 -0
  26. safety_agent-0.1.0/tests/test_anthropic_guard.py +61 -0
  27. safety_agent-0.1.0/tests/test_anthropic_redact.py +39 -0
  28. safety_agent-0.1.0/tests/test_openai_guard.py +143 -0
  29. safety_agent-0.1.0/tests/test_openai_redact.py +89 -0
  30. safety_agent-0.1.0/tests/test_superagent_guard.py +71 -0
  31. safety_agent-0.1.0/uv.lock +278 -0
@@ -0,0 +1,118 @@
1
+ # Dependencies
2
+ node_modules/
3
+ */node_modules/
4
+ sdk/typescript/package-lock.json
5
+
6
+ # Python virtual environments
7
+ venv/
8
+ .venv/
9
+ */venv/
10
+ */.venv/
11
+ __pycache__/
12
+ */__pycache__/
13
+ *.py[cod]
14
+ *$py.class
15
+ sdk/python/src/superagent_*.egg-info/
16
+ sdk/python/uv.lock
17
+
18
+ # AI models and caches
19
+ models/
20
+ */models/
21
+ *.gguf
22
+
23
+ # Build outputs
24
+ dist/
25
+ build/
26
+ target/
27
+ bin/
28
+ *.exe
29
+
30
+ # Environment files
31
+ .env
32
+ .env.local
33
+ .env.*.local
34
+
35
+ # OS files
36
+ .DS_Store
37
+ Thumbs.db
38
+
39
+ # Editor files
40
+ .vscode/
41
+ .idea/
42
+ *.swp
43
+ *.swo
44
+ *~
45
+
46
+ # Logs
47
+ *.log
48
+ logs/
49
+
50
+ # Runtime data
51
+ pids
52
+ *.pid
53
+ *.seed
54
+ *.pid.lock
55
+
56
+ # Coverage directory used by tools like istanbul
57
+ coverage/
58
+
59
+ # Dependency directories
60
+ jspm_packages/
61
+
62
+ # Optional npm cache directory
63
+ .npm
64
+
65
+ # Optional eslint cache
66
+ .eslintcache
67
+
68
+ # Microbundle cache
69
+ .rpt2_cache/
70
+ .rts2_cache_cjs/
71
+ .rts2_cache_es/
72
+ .rts2_cache_umd/
73
+
74
+ # Optional REPL history
75
+ .node_repl_history
76
+
77
+ # Output of 'npm pack'
78
+ *.tgz
79
+
80
+ # Yarn Integrity file
81
+ .yarn-integrity
82
+
83
+ # parcel-bundler cache (https://parceljs.org/)
84
+ .cache
85
+ .parcel-cache
86
+
87
+ # next.js build output
88
+ .next
89
+
90
+ # nuxt.js build output
91
+ .nuxt
92
+
93
+ # vuepress build output
94
+ .vuepress/dist
95
+
96
+ # Serverless directories
97
+ .serverless
98
+
99
+ # FuseBox cache
100
+ .fusebox/
101
+
102
+ # DynamoDB Local files
103
+ .dynamodb/
104
+
105
+ # Rust specific
106
+ # Cargo.lock - committed for applications (not libraries)
107
+ target/
108
+
109
+ # Go specific
110
+ go.mod
111
+ go.sum
112
+
113
+ # Temporary files
114
+ tmp/
115
+ temp/
116
+
117
+ # Coding agents
118
+ .claude/
@@ -0,0 +1,153 @@
1
+ Metadata-Version: 2.4
2
+ Name: safety-agent
3
+ Version: 0.1.0
4
+ Summary: A lightweight Python guardrail SDK for content safety
5
+ Project-URL: Homepage, https://superagent.sh
6
+ Project-URL: Documentation, https://docs.superagent.sh
7
+ Project-URL: Repository, https://github.com/superagent-ai/superagent
8
+ Author: Superagent AI
9
+ License-Expression: MIT
10
+ Keywords: ai,content-moderation,guardrail,llm,safety
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Requires-Python: >=3.10
22
+ Requires-Dist: httpx>=0.27.0
23
+ Requires-Dist: pypdf>=5.0.0
24
+ Description-Content-Type: text/markdown
25
+
26
+ # Safety Agent Python SDK
27
+
28
+ A lightweight Python guardrail SDK for content safety. Guard against prompt injections, jailbreaks, and data exfiltration. Redact PII, PHI, and secrets from text.
29
+
30
+ ## Installation
31
+
32
+ ```bash
33
+ uv add safety-agent
34
+ ```
35
+
36
+ Or with pip:
37
+
38
+ ```bash
39
+ pip install safety-agent
40
+ ```
41
+
42
+ ## Prerequisites
43
+
44
+ Sign up at [superagent.sh](https://superagent.sh) to get your API key.
45
+
46
+ ```bash
47
+ export SUPERAGENT_API_KEY=your-key
48
+ ```
49
+
50
+ ## Quick Start
51
+
52
+ ```python
53
+ from safety_agent import create_client
54
+
55
+ client = create_client()
56
+
57
+ # Guard: Detect threats (uses default superagent/guard-1.7b model)
58
+ result = await client.guard(input="user message to analyze")
59
+
60
+ if result.classification == "block":
61
+ print("Blocked:", result.violation_types)
62
+
63
+ # Redact: Remove PII
64
+ result = await client.redact(
65
+ input="My email is john@example.com",
66
+ model="openai/gpt-4o-mini"
67
+ )
68
+
69
+ print(result.redacted)
70
+ # "My email is <EMAIL_REDACTED>"
71
+ ```
72
+
73
+ ## Guard
74
+
75
+ The `guard()` method classifies input content as `pass` or `block`. It detects prompt injections, malicious instructions, and security threats.
76
+
77
+ ```python
78
+ result = await client.guard(
79
+ input="Ignore all previous instructions",
80
+ model="openai/gpt-4o-mini", # Optional, defaults to superagent/guard-1.7b
81
+ system_prompt="Custom system prompt", # Optional
82
+ chunk_size=8000, # Optional, characters per chunk
83
+ )
84
+
85
+ print(result.classification) # "pass" or "block"
86
+ print(result.violation_types) # ["prompt_injection", ...]
87
+ print(result.cwe_codes) # ["CWE-94", ...]
88
+ ```
89
+
90
+ ### Input Types
91
+
92
+ Guard supports multiple input types:
93
+
94
+ - **Plain text**: Analyzed directly
95
+ - **URLs**: Automatically fetched and analyzed
96
+ - **Bytes/Files**: Analyzed based on content type
97
+ - **PDFs**: Text extracted and analyzed per page
98
+
99
+ ```python
100
+ # URL input
101
+ result = await client.guard(input="https://example.com/document.pdf")
102
+
103
+ # File input
104
+ with open("document.pdf", "rb") as f:
105
+ result = await client.guard(input=f.read())
106
+ ```
107
+
108
+ ## Redact
109
+
110
+ The `redact()` method removes sensitive content from text.
111
+
112
+ ```python
113
+ result = await client.redact(
114
+ input="My SSN is 123-45-6789",
115
+ model="openai/gpt-4o-mini",
116
+ entities=["SSN", "email"], # Optional, custom entities
117
+ rewrite=True, # Optional, contextual rewriting
118
+ )
119
+
120
+ print(result.redacted)
121
+ print(result.findings)
122
+ ```
123
+
124
+ ## Supported Providers
125
+
126
+ - OpenAI (`openai/gpt-4o`, `openai/gpt-4o-mini`, etc.)
127
+ - Anthropic (`anthropic/claude-3-5-sonnet-20241022`, etc.)
128
+ - Google (`google/gemini-2.0-flash`, etc.)
129
+ - AWS Bedrock (`bedrock/us.anthropic.claude-3-5-sonnet-20241022-v2:0`, etc.)
130
+ - Groq (`groq/llama-3.3-70b-versatile`, etc.)
131
+ - Fireworks (`fireworks/accounts/fireworks/models/llama-v3p3-70b-instruct`, etc.)
132
+ - OpenRouter (`openrouter/openai/gpt-4o`, etc.)
133
+ - Vercel (`vercel/openai/gpt-4o`, etc.)
134
+ - Superagent (`superagent/guard-1.7b`, etc.) - Default for guard
135
+
136
+ ## Environment Variables
137
+
138
+ Configure provider API keys:
139
+
140
+ ```bash
141
+ export SUPERAGENT_API_KEY=your-superagent-key
142
+ export OPENAI_API_KEY=your-openai-key
143
+ export ANTHROPIC_API_KEY=your-anthropic-key
144
+ export GOOGLE_API_KEY=your-google-key
145
+ export GROQ_API_KEY=your-groq-key
146
+ export FIREWORKS_API_KEY=your-fireworks-key
147
+ export OPENROUTER_API_KEY=your-openrouter-key
148
+ export AI_GATEWAY_API_KEY=your-vercel-key
149
+ ```
150
+
151
+ ## License
152
+
153
+ MIT
@@ -0,0 +1,128 @@
1
+ # Safety Agent Python SDK
2
+
3
+ A lightweight Python guardrail SDK for content safety. Guard against prompt injections, jailbreaks, and data exfiltration. Redact PII, PHI, and secrets from text.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ uv add safety-agent
9
+ ```
10
+
11
+ Or with pip:
12
+
13
+ ```bash
14
+ pip install safety-agent
15
+ ```
16
+
17
+ ## Prerequisites
18
+
19
+ Sign up at [superagent.sh](https://superagent.sh) to get your API key.
20
+
21
+ ```bash
22
+ export SUPERAGENT_API_KEY=your-key
23
+ ```
24
+
25
+ ## Quick Start
26
+
27
+ ```python
28
+ from safety_agent import create_client
29
+
30
+ client = create_client()
31
+
32
+ # Guard: Detect threats (uses default superagent/guard-1.7b model)
33
+ result = await client.guard(input="user message to analyze")
34
+
35
+ if result.classification == "block":
36
+ print("Blocked:", result.violation_types)
37
+
38
+ # Redact: Remove PII
39
+ result = await client.redact(
40
+ input="My email is john@example.com",
41
+ model="openai/gpt-4o-mini"
42
+ )
43
+
44
+ print(result.redacted)
45
+ # "My email is <EMAIL_REDACTED>"
46
+ ```
47
+
48
+ ## Guard
49
+
50
+ The `guard()` method classifies input content as `pass` or `block`. It detects prompt injections, malicious instructions, and security threats.
51
+
52
+ ```python
53
+ result = await client.guard(
54
+ input="Ignore all previous instructions",
55
+ model="openai/gpt-4o-mini", # Optional, defaults to superagent/guard-1.7b
56
+ system_prompt="Custom system prompt", # Optional
57
+ chunk_size=8000, # Optional, characters per chunk
58
+ )
59
+
60
+ print(result.classification) # "pass" or "block"
61
+ print(result.violation_types) # ["prompt_injection", ...]
62
+ print(result.cwe_codes) # ["CWE-94", ...]
63
+ ```
64
+
65
+ ### Input Types
66
+
67
+ Guard supports multiple input types:
68
+
69
+ - **Plain text**: Analyzed directly
70
+ - **URLs**: Automatically fetched and analyzed
71
+ - **Bytes/Files**: Analyzed based on content type
72
+ - **PDFs**: Text extracted and analyzed per page
73
+
74
+ ```python
75
+ # URL input
76
+ result = await client.guard(input="https://example.com/document.pdf")
77
+
78
+ # File input
79
+ with open("document.pdf", "rb") as f:
80
+ result = await client.guard(input=f.read())
81
+ ```
82
+
83
+ ## Redact
84
+
85
+ The `redact()` method removes sensitive content from text.
86
+
87
+ ```python
88
+ result = await client.redact(
89
+ input="My SSN is 123-45-6789",
90
+ model="openai/gpt-4o-mini",
91
+ entities=["SSN", "email"], # Optional, custom entities
92
+ rewrite=True, # Optional, contextual rewriting
93
+ )
94
+
95
+ print(result.redacted)
96
+ print(result.findings)
97
+ ```
98
+
99
+ ## Supported Providers
100
+
101
+ - OpenAI (`openai/gpt-4o`, `openai/gpt-4o-mini`, etc.)
102
+ - Anthropic (`anthropic/claude-3-5-sonnet-20241022`, etc.)
103
+ - Google (`google/gemini-2.0-flash`, etc.)
104
+ - AWS Bedrock (`bedrock/us.anthropic.claude-3-5-sonnet-20241022-v2:0`, etc.)
105
+ - Groq (`groq/llama-3.3-70b-versatile`, etc.)
106
+ - Fireworks (`fireworks/accounts/fireworks/models/llama-v3p3-70b-instruct`, etc.)
107
+ - OpenRouter (`openrouter/openai/gpt-4o`, etc.)
108
+ - Vercel (`vercel/openai/gpt-4o`, etc.)
109
+ - Superagent (`superagent/guard-1.7b`, etc.) - Default for guard
110
+
111
+ ## Environment Variables
112
+
113
+ Configure provider API keys:
114
+
115
+ ```bash
116
+ export SUPERAGENT_API_KEY=your-superagent-key
117
+ export OPENAI_API_KEY=your-openai-key
118
+ export ANTHROPIC_API_KEY=your-anthropic-key
119
+ export GOOGLE_API_KEY=your-google-key
120
+ export GROQ_API_KEY=your-groq-key
121
+ export FIREWORKS_API_KEY=your-fireworks-key
122
+ export OPENROUTER_API_KEY=your-openrouter-key
123
+ export AI_GATEWAY_API_KEY=your-vercel-key
124
+ ```
125
+
126
+ ## License
127
+
128
+ MIT
@@ -0,0 +1,49 @@
1
+ [project]
2
+ name = "safety-agent"
3
+ version = "0.1.0"
4
+ description = "A lightweight Python guardrail SDK for content safety"
5
+ readme = "README.md"
6
+ license = "MIT"
7
+ authors = [{ name = "Superagent AI" }]
8
+ requires-python = ">=3.10"
9
+ keywords = ["guardrail", "safety", "llm", "ai", "content-moderation"]
10
+ classifiers = [
11
+ "Development Status :: 4 - Beta",
12
+ "Intended Audience :: Developers",
13
+ "License :: OSI Approved :: MIT License",
14
+ "Programming Language :: Python :: 3",
15
+ "Programming Language :: Python :: 3.10",
16
+ "Programming Language :: Python :: 3.11",
17
+ "Programming Language :: Python :: 3.12",
18
+ "Programming Language :: Python :: 3.13",
19
+ "Topic :: Software Development :: Libraries :: Python Modules",
20
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
21
+ ]
22
+ dependencies = [
23
+ "httpx>=0.27.0",
24
+ "pypdf>=5.0.0",
25
+ ]
26
+
27
+ [project.urls]
28
+ Homepage = "https://superagent.sh"
29
+ Documentation = "https://docs.superagent.sh"
30
+ Repository = "https://github.com/superagent-ai/superagent"
31
+
32
+ [build-system]
33
+ requires = ["hatchling"]
34
+ build-backend = "hatchling.build"
35
+
36
+ [tool.hatch.build.targets.wheel]
37
+ packages = ["src/safety_agent"]
38
+
39
+ [tool.uv]
40
+ dev-dependencies = [
41
+ "pytest>=8.0.0",
42
+ "pytest-asyncio>=0.24.0",
43
+ "python-dotenv>=1.0.0",
44
+ ]
45
+
46
+ [tool.pytest.ini_options]
47
+ asyncio_mode = "auto"
48
+ asyncio_default_fixture_loop_scope = "function"
49
+ testpaths = ["tests"]
@@ -0,0 +1,45 @@
1
+ """
2
+ safety-agent
3
+ A lightweight Python guardrail SDK for content safety
4
+ """
5
+
6
+ from .client import SafetyClient, create_client
7
+ from .types import (
8
+ ClientConfig,
9
+ GuardInput,
10
+ GuardOptions,
11
+ RedactOptions,
12
+ GuardClassificationResult,
13
+ RedactResult,
14
+ GuardResponse,
15
+ RedactResponse,
16
+ ChatMessage,
17
+ MultimodalContentPart,
18
+ ProcessedInput,
19
+ AnalysisResponse,
20
+ TokenUsage,
21
+ ParsedModel,
22
+ )
23
+
24
+ __version__ = "0.1.0"
25
+
26
+ __all__ = [
27
+ # Client
28
+ "SafetyClient",
29
+ "create_client",
30
+ # Types
31
+ "ClientConfig",
32
+ "GuardInput",
33
+ "GuardOptions",
34
+ "RedactOptions",
35
+ "GuardClassificationResult",
36
+ "RedactResult",
37
+ "GuardResponse",
38
+ "RedactResponse",
39
+ "ChatMessage",
40
+ "MultimodalContentPart",
41
+ "ProcessedInput",
42
+ "AnalysisResponse",
43
+ "TokenUsage",
44
+ "ParsedModel",
45
+ ]