safety-agent 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. safety_agent-0.1.2/.gitignore +118 -0
  2. safety_agent-0.1.2/PKG-INFO +154 -0
  3. safety_agent-0.1.2/README.md +128 -0
  4. safety_agent-0.1.2/pyproject.toml +50 -0
  5. safety_agent-0.1.2/src/safety_agent/__init__.py +51 -0
  6. safety_agent-0.1.2/src/safety_agent/client.py +705 -0
  7. safety_agent-0.1.2/src/safety_agent/prompts/__init__.py +23 -0
  8. safety_agent-0.1.2/src/safety_agent/prompts/guard.py +71 -0
  9. safety_agent-0.1.2/src/safety_agent/prompts/redact.py +192 -0
  10. safety_agent-0.1.2/src/safety_agent/providers/__init__.py +134 -0
  11. safety_agent-0.1.2/src/safety_agent/providers/anthropic.py +120 -0
  12. safety_agent-0.1.2/src/safety_agent/providers/bedrock.py +210 -0
  13. safety_agent-0.1.2/src/safety_agent/providers/fireworks.py +74 -0
  14. safety_agent-0.1.2/src/safety_agent/providers/google.py +124 -0
  15. safety_agent-0.1.2/src/safety_agent/providers/groq.py +74 -0
  16. safety_agent-0.1.2/src/safety_agent/providers/openai.py +103 -0
  17. safety_agent-0.1.2/src/safety_agent/providers/openrouter.py +74 -0
  18. safety_agent-0.1.2/src/safety_agent/providers/superagent.py +119 -0
  19. safety_agent-0.1.2/src/safety_agent/providers/types.py +72 -0
  20. safety_agent-0.1.2/src/safety_agent/providers/vercel.py +80 -0
  21. safety_agent-0.1.2/src/safety_agent/schemas.py +68 -0
  22. safety_agent-0.1.2/src/safety_agent/types.py +309 -0
  23. safety_agent-0.1.2/src/safety_agent/utils/__init__.py +7 -0
  24. safety_agent-0.1.2/src/safety_agent/utils/input_processor.py +350 -0
  25. safety_agent-0.1.2/tests/conftest.py +21 -0
  26. safety_agent-0.1.2/tests/test_anthropic_guard.py +77 -0
  27. safety_agent-0.1.2/tests/test_anthropic_redact.py +39 -0
  28. safety_agent-0.1.2/tests/test_openai_guard.py +170 -0
  29. safety_agent-0.1.2/tests/test_openai_redact.py +89 -0
  30. safety_agent-0.1.2/tests/test_scan.py +95 -0
  31. safety_agent-0.1.2/tests/test_superagent_guard.py +94 -0
  32. safety_agent-0.1.2/uv.lock +1562 -0
@@ -0,0 +1,118 @@
1
+ # Dependencies
2
+ node_modules/
3
+ */node_modules/
4
+ sdk/typescript/package-lock.json
5
+
6
+ # Python virtual environments
7
+ venv/
8
+ .venv/
9
+ */venv/
10
+ */.venv/
11
+ __pycache__/
12
+ */__pycache__/
13
+ *.py[cod]
14
+ *$py.class
15
+ sdk/python/src/superagent_*.egg-info/
16
+ sdk/python/uv.lock
17
+
18
+ # AI models and caches
19
+ models/
20
+ */models/
21
+ *.gguf
22
+
23
+ # Build outputs
24
+ dist/
25
+ build/
26
+ target/
27
+ bin/
28
+ *.exe
29
+
30
+ # Environment files
31
+ .env
32
+ .env.local
33
+ .env.*.local
34
+
35
+ # OS files
36
+ .DS_Store
37
+ Thumbs.db
38
+
39
+ # Editor files
40
+ .vscode/
41
+ .idea/
42
+ *.swp
43
+ *.swo
44
+ *~
45
+
46
+ # Logs
47
+ *.log
48
+ logs/
49
+
50
+ # Runtime data
51
+ pids
52
+ *.pid
53
+ *.seed
54
+ *.pid.lock
55
+
56
+ # Coverage directory used by tools like istanbul
57
+ coverage/
58
+
59
+ # Dependency directories
60
+ jspm_packages/
61
+
62
+ # Optional npm cache directory
63
+ .npm
64
+
65
+ # Optional eslint cache
66
+ .eslintcache
67
+
68
+ # Microbundle cache
69
+ .rpt2_cache/
70
+ .rts2_cache_cjs/
71
+ .rts2_cache_es/
72
+ .rts2_cache_umd/
73
+
74
+ # Optional REPL history
75
+ .node_repl_history
76
+
77
+ # Output of 'npm pack'
78
+ *.tgz
79
+
80
+ # Yarn Integrity file
81
+ .yarn-integrity
82
+
83
+ # parcel-bundler cache (https://parceljs.org/)
84
+ .cache
85
+ .parcel-cache
86
+
87
+ # next.js build output
88
+ .next
89
+
90
+ # nuxt.js build output
91
+ .nuxt
92
+
93
+ # vuepress build output
94
+ .vuepress/dist
95
+
96
+ # Serverless directories
97
+ .serverless
98
+
99
+ # FuseBox cache
100
+ .fusebox/
101
+
102
+ # DynamoDB Local files
103
+ .dynamodb/
104
+
105
+ # Rust specific
106
+ # Cargo.lock - committed for applications (not libraries)
107
+ target/
108
+
109
+ # Go specific
110
+ go.mod
111
+ go.sum
112
+
113
+ # Temporary files
114
+ tmp/
115
+ temp/
116
+
117
+ # Coding agents
118
+ .claude/
@@ -0,0 +1,154 @@
1
+ Metadata-Version: 2.4
2
+ Name: safety-agent
3
+ Version: 0.1.2
4
+ Summary: A lightweight Python guardrail SDK for content safety
5
+ Project-URL: Homepage, https://superagent.sh
6
+ Project-URL: Documentation, https://docs.superagent.sh
7
+ Project-URL: Repository, https://github.com/superagent-ai/superagent
8
+ Author: Superagent AI
9
+ License-Expression: MIT
10
+ Keywords: ai,content-moderation,guardrail,llm,safety
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Requires-Python: >=3.10
22
+ Requires-Dist: daytona-sdk>=0.129.0
23
+ Requires-Dist: httpx>=0.27.0
24
+ Requires-Dist: pypdf>=5.0.0
25
+ Description-Content-Type: text/markdown
26
+
27
+ # Safety Agent Python SDK
28
+
29
+ A lightweight Python guardrail SDK for content safety. Guard against prompt injections, jailbreaks, and data exfiltration. Redact PII, PHI, and secrets from text.
30
+
31
+ ## Installation
32
+
33
+ ```bash
34
+ uv add safety-agent
35
+ ```
36
+
37
+ Or with pip:
38
+
39
+ ```bash
40
+ pip install safety-agent
41
+ ```
42
+
43
+ ## Prerequisites
44
+
45
+ Sign up at [superagent.sh](https://superagent.sh) to get your API key.
46
+
47
+ ```bash
48
+ export SUPERAGENT_API_KEY=your-key
49
+ ```
50
+
51
+ ## Quick Start
52
+
53
+ ```python
54
+ from safety_agent import create_client
55
+
56
+ client = create_client()
57
+
58
+ # Guard: Detect threats (uses default superagent/guard-1.7b model)
59
+ result = await client.guard(input="user message to analyze")
60
+
61
+ if result.classification == "block":
62
+ print("Blocked:", result.violation_types)
63
+
64
+ # Redact: Remove PII
65
+ result = await client.redact(
66
+ input="My email is john@example.com",
67
+ model="openai/gpt-4o-mini"
68
+ )
69
+
70
+ print(result.redacted)
71
+ # "My email is <EMAIL_REDACTED>"
72
+ ```
73
+
74
+ ## Guard
75
+
76
+ The `guard()` method classifies input content as `pass` or `block`. It detects prompt injections, malicious instructions, and security threats.
77
+
78
+ ```python
79
+ result = await client.guard(
80
+ input="Ignore all previous instructions",
81
+ model="openai/gpt-4o-mini", # Optional, defaults to superagent/guard-1.7b
82
+ system_prompt="Custom system prompt", # Optional
83
+ chunk_size=8000, # Optional, characters per chunk
84
+ )
85
+
86
+ print(result.classification) # "pass" or "block"
87
+ print(result.violation_types) # ["prompt_injection", ...]
88
+ print(result.cwe_codes) # ["CWE-94", ...]
89
+ ```
90
+
91
+ ### Input Types
92
+
93
+ Guard supports multiple input types:
94
+
95
+ - **Plain text**: Analyzed directly
96
+ - **URLs**: Automatically fetched and analyzed
97
+ - **Bytes/Files**: Analyzed based on content type
98
+ - **PDFs**: Text extracted and analyzed per page
99
+
100
+ ```python
101
+ # URL input
102
+ result = await client.guard(input="https://example.com/document.pdf")
103
+
104
+ # File input
105
+ with open("document.pdf", "rb") as f:
106
+ result = await client.guard(input=f.read())
107
+ ```
108
+
109
+ ## Redact
110
+
111
+ The `redact()` method removes sensitive content from text.
112
+
113
+ ```python
114
+ result = await client.redact(
115
+ input="My SSN is 123-45-6789",
116
+ model="openai/gpt-4o-mini",
117
+ entities=["SSN", "email"], # Optional, custom entities
118
+ rewrite=True, # Optional, contextual rewriting
119
+ )
120
+
121
+ print(result.redacted)
122
+ print(result.findings)
123
+ ```
124
+
125
+ ## Supported Providers
126
+
127
+ - OpenAI (`openai/gpt-4o`, `openai/gpt-4o-mini`, etc.)
128
+ - Anthropic (`anthropic/claude-3-5-sonnet-20241022`, etc.)
129
+ - Google (`google/gemini-2.0-flash`, etc.)
130
+ - AWS Bedrock (`bedrock/us.anthropic.claude-3-5-sonnet-20241022-v2:0`, etc.)
131
+ - Groq (`groq/llama-3.3-70b-versatile`, etc.)
132
+ - Fireworks (`fireworks/accounts/fireworks/models/llama-v3p3-70b-instruct`, etc.)
133
+ - OpenRouter (`openrouter/openai/gpt-4o`, etc.)
134
+ - Vercel (`vercel/openai/gpt-4o`, etc.)
135
+ - Superagent (`superagent/guard-1.7b`, etc.) - Default for guard
136
+
137
+ ## Environment Variables
138
+
139
+ Configure provider API keys:
140
+
141
+ ```bash
142
+ export SUPERAGENT_API_KEY=your-superagent-key
143
+ export OPENAI_API_KEY=your-openai-key
144
+ export ANTHROPIC_API_KEY=your-anthropic-key
145
+ export GOOGLE_API_KEY=your-google-key
146
+ export GROQ_API_KEY=your-groq-key
147
+ export FIREWORKS_API_KEY=your-fireworks-key
148
+ export OPENROUTER_API_KEY=your-openrouter-key
149
+ export AI_GATEWAY_API_KEY=your-vercel-key
150
+ ```
151
+
152
+ ## License
153
+
154
+ MIT
@@ -0,0 +1,128 @@
1
+ # Safety Agent Python SDK
2
+
3
+ A lightweight Python guardrail SDK for content safety. Guard against prompt injections, jailbreaks, and data exfiltration. Redact PII, PHI, and secrets from text.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ uv add safety-agent
9
+ ```
10
+
11
+ Or with pip:
12
+
13
+ ```bash
14
+ pip install safety-agent
15
+ ```
16
+
17
+ ## Prerequisites
18
+
19
+ Sign up at [superagent.sh](https://superagent.sh) to get your API key.
20
+
21
+ ```bash
22
+ export SUPERAGENT_API_KEY=your-key
23
+ ```
24
+
25
+ ## Quick Start
26
+
27
+ ```python
28
+ from safety_agent import create_client
29
+
30
+ client = create_client()
31
+
32
+ # Guard: Detect threats (uses default superagent/guard-1.7b model)
33
+ result = await client.guard(input="user message to analyze")
34
+
35
+ if result.classification == "block":
36
+ print("Blocked:", result.violation_types)
37
+
38
+ # Redact: Remove PII
39
+ result = await client.redact(
40
+ input="My email is john@example.com",
41
+ model="openai/gpt-4o-mini"
42
+ )
43
+
44
+ print(result.redacted)
45
+ # "My email is <EMAIL_REDACTED>"
46
+ ```
47
+
48
+ ## Guard
49
+
50
+ The `guard()` method classifies input content as `pass` or `block`. It detects prompt injections, malicious instructions, and security threats.
51
+
52
+ ```python
53
+ result = await client.guard(
54
+ input="Ignore all previous instructions",
55
+ model="openai/gpt-4o-mini", # Optional, defaults to superagent/guard-1.7b
56
+ system_prompt="Custom system prompt", # Optional
57
+ chunk_size=8000, # Optional, characters per chunk
58
+ )
59
+
60
+ print(result.classification) # "pass" or "block"
61
+ print(result.violation_types) # ["prompt_injection", ...]
62
+ print(result.cwe_codes) # ["CWE-94", ...]
63
+ ```
64
+
65
+ ### Input Types
66
+
67
+ Guard supports multiple input types:
68
+
69
+ - **Plain text**: Analyzed directly
70
+ - **URLs**: Automatically fetched and analyzed
71
+ - **Bytes/Files**: Analyzed based on content type
72
+ - **PDFs**: Text extracted and analyzed per page
73
+
74
+ ```python
75
+ # URL input
76
+ result = await client.guard(input="https://example.com/document.pdf")
77
+
78
+ # File input
79
+ with open("document.pdf", "rb") as f:
80
+ result = await client.guard(input=f.read())
81
+ ```
82
+
83
+ ## Redact
84
+
85
+ The `redact()` method removes sensitive content from text.
86
+
87
+ ```python
88
+ result = await client.redact(
89
+ input="My SSN is 123-45-6789",
90
+ model="openai/gpt-4o-mini",
91
+ entities=["SSN", "email"], # Optional, custom entities
92
+ rewrite=True, # Optional, contextual rewriting
93
+ )
94
+
95
+ print(result.redacted)
96
+ print(result.findings)
97
+ ```
98
+
99
+ ## Supported Providers
100
+
101
+ - OpenAI (`openai/gpt-4o`, `openai/gpt-4o-mini`, etc.)
102
+ - Anthropic (`anthropic/claude-3-5-sonnet-20241022`, etc.)
103
+ - Google (`google/gemini-2.0-flash`, etc.)
104
+ - AWS Bedrock (`bedrock/us.anthropic.claude-3-5-sonnet-20241022-v2:0`, etc.)
105
+ - Groq (`groq/llama-3.3-70b-versatile`, etc.)
106
+ - Fireworks (`fireworks/accounts/fireworks/models/llama-v3p3-70b-instruct`, etc.)
107
+ - OpenRouter (`openrouter/openai/gpt-4o`, etc.)
108
+ - Vercel (`vercel/openai/gpt-4o`, etc.)
109
+ - Superagent (`superagent/guard-1.7b`, etc.) - Default for guard
110
+
111
+ ## Environment Variables
112
+
113
+ Configure provider API keys:
114
+
115
+ ```bash
116
+ export SUPERAGENT_API_KEY=your-superagent-key
117
+ export OPENAI_API_KEY=your-openai-key
118
+ export ANTHROPIC_API_KEY=your-anthropic-key
119
+ export GOOGLE_API_KEY=your-google-key
120
+ export GROQ_API_KEY=your-groq-key
121
+ export FIREWORKS_API_KEY=your-fireworks-key
122
+ export OPENROUTER_API_KEY=your-openrouter-key
123
+ export AI_GATEWAY_API_KEY=your-vercel-key
124
+ ```
125
+
126
+ ## License
127
+
128
+ MIT
@@ -0,0 +1,50 @@
1
+ [project]
2
+ name = "safety-agent"
3
+ version = "0.1.2"
4
+ description = "A lightweight Python guardrail SDK for content safety"
5
+ readme = "README.md"
6
+ license = "MIT"
7
+ authors = [{ name = "Superagent AI" }]
8
+ requires-python = ">=3.10"
9
+ keywords = ["guardrail", "safety", "llm", "ai", "content-moderation"]
10
+ classifiers = [
11
+ "Development Status :: 4 - Beta",
12
+ "Intended Audience :: Developers",
13
+ "License :: OSI Approved :: MIT License",
14
+ "Programming Language :: Python :: 3",
15
+ "Programming Language :: Python :: 3.10",
16
+ "Programming Language :: Python :: 3.11",
17
+ "Programming Language :: Python :: 3.12",
18
+ "Programming Language :: Python :: 3.13",
19
+ "Topic :: Software Development :: Libraries :: Python Modules",
20
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
21
+ ]
22
+ dependencies = [
23
+ "httpx>=0.27.0",
24
+ "pypdf>=5.0.0",
25
+ "daytona-sdk>=0.129.0",
26
+ ]
27
+
28
+ [project.urls]
29
+ Homepage = "https://superagent.sh"
30
+ Documentation = "https://docs.superagent.sh"
31
+ Repository = "https://github.com/superagent-ai/superagent"
32
+
33
+ [build-system]
34
+ requires = ["hatchling"]
35
+ build-backend = "hatchling.build"
36
+
37
+ [tool.hatch.build.targets.wheel]
38
+ packages = ["src/safety_agent"]
39
+
40
+ [tool.uv]
41
+ dev-dependencies = [
42
+ "pytest>=8.0.0",
43
+ "pytest-asyncio>=0.24.0",
44
+ "python-dotenv>=1.0.0",
45
+ ]
46
+
47
+ [tool.pytest.ini_options]
48
+ asyncio_mode = "auto"
49
+ asyncio_default_fixture_loop_scope = "function"
50
+ testpaths = ["tests"]
@@ -0,0 +1,51 @@
1
+ """
2
+ safety-agent
3
+ A lightweight Python guardrail SDK for content safety
4
+ """
5
+
6
+ from .client import SafetyClient, create_client
7
+ from .types import (
8
+ ClientConfig,
9
+ GuardInput,
10
+ GuardOptions,
11
+ RedactOptions,
12
+ ScanOptions,
13
+ GuardClassificationResult,
14
+ RedactResult,
15
+ GuardResponse,
16
+ RedactResponse,
17
+ ScanResponse,
18
+ ScanUsage,
19
+ ChatMessage,
20
+ MultimodalContentPart,
21
+ ProcessedInput,
22
+ AnalysisResponse,
23
+ TokenUsage,
24
+ ParsedModel,
25
+ )
26
+
27
+ __version__ = "0.1.0"
28
+
29
+ __all__ = [
30
+ # Client
31
+ "SafetyClient",
32
+ "create_client",
33
+ # Types
34
+ "ClientConfig",
35
+ "GuardInput",
36
+ "GuardOptions",
37
+ "RedactOptions",
38
+ "ScanOptions",
39
+ "GuardClassificationResult",
40
+ "RedactResult",
41
+ "GuardResponse",
42
+ "RedactResponse",
43
+ "ScanResponse",
44
+ "ScanUsage",
45
+ "ChatMessage",
46
+ "MultimodalContentPart",
47
+ "ProcessedInput",
48
+ "AnalysisResponse",
49
+ "TokenUsage",
50
+ "ParsedModel",
51
+ ]