ClawGuard-PII 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ Metadata-Version: 2.4
2
+ Name: ClawGuard-PII
3
+ Version: 0.1.0
4
+ Summary: Local PII redaction service for OpenClaw using nvidia/gliner-PII
5
+ License-Expression: Apache-2.0
6
+ Requires-Python: >=3.10
7
+ Requires-Dist: gliner>=0.2.0
8
+ Requires-Dist: fastapi>=0.111.0
9
+ Requires-Dist: uvicorn[standard]>=0.30.0
10
+ Provides-Extra: dev
11
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
12
+ Requires-Dist: httpx>=0.27.0; extra == "dev"
@@ -0,0 +1,57 @@
1
+ # ClawGuard
2
+
3
+ Local PII redaction server for [OpenClaw](https://openclaw.ai). Runs entirely on-device using [`nvidia/gliner-PII`](https://huggingface.co/nvidia/gliner-PII) — no data leaves your machine.
4
+
5
+ Designed for use with the [pii-redactor](https://clawhub.com/skills/pii-redactor) OpenClaw skill.
6
+
7
+ ## Requirements
8
+
9
+ - Python 3.10+
10
+
11
+ ## Install
12
+
13
+ ```bash
14
+ pip install clawguard
15
+ ```
16
+
17
+ ## Quickstart
18
+
19
+ 1. Generate a token:
20
+
21
+ ```bash
22
+ python3 -c "import secrets; print(secrets.token_hex(32))"
23
+ ```
24
+
25
+ 2. Start the server:
26
+
27
+ ```bash
28
+ CLAWGUARD_TOKEN=<your-token> clawguard serve
29
+ ```
30
+
31
+ 3. Set env vars in your agent:
32
+
33
+ ```
34
+ CLAWGUARD_URL=http://localhost:8000
35
+ CLAWGUARD_TOKEN=<your-token>
36
+ ```
37
+ ## Supported Entity Types
38
+ `nvidia/gliner-PII` was fine-tuned to extract the following entity types:
39
+
40
+ * email
41
+ * phone_number
42
+ * ssn
43
+ * credit_card_number
44
+ * bank_account_number
45
+ * ip_address
46
+ * password
47
+ * api_key
48
+ * user_name
49
+ * date_of_birth
50
+ * drivers_license_number
51
+ * passport_number
52
+ * address
53
+ * medical_record_number
54
+ * health_insurance_id
55
+
56
+
57
+ **License: Apache 2.0**
@@ -0,0 +1,30 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "ClawGuard-PII"
7
+ version = "0.1.0"
8
+ description = "Local PII redaction service for OpenClaw using nvidia/gliner-PII"
9
+ requires-python = ">=3.10"
10
+ license = "Apache-2.0"
11
+ dependencies = [
12
+ "gliner>=0.2.0",
13
+ "fastapi>=0.111.0",
14
+ "uvicorn[standard]>=0.30.0",
15
+ ]
16
+
17
+ [project.scripts]
18
+ clawguard = "clawguard.cli:serve"
19
+
20
+ [project.optional-dependencies]
21
+ dev = [
22
+ "pytest>=8.0.0",
23
+ "httpx>=0.27.0",
24
+ ]
25
+
26
+ [tool.setuptools.packages.find]
27
+ where = ["src"]
28
+
29
+ [tool.pytest.ini_options]
30
+ testpaths = ["tests"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,12 @@
1
+ Metadata-Version: 2.4
2
+ Name: ClawGuard-PII
3
+ Version: 0.1.0
4
+ Summary: Local PII redaction service for OpenClaw using nvidia/gliner-PII
5
+ License-Expression: Apache-2.0
6
+ Requires-Python: >=3.10
7
+ Requires-Dist: gliner>=0.2.0
8
+ Requires-Dist: fastapi>=0.111.0
9
+ Requires-Dist: uvicorn[standard]>=0.30.0
10
+ Provides-Extra: dev
11
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
12
+ Requires-Dist: httpx>=0.27.0; extra == "dev"
@@ -0,0 +1,14 @@
1
+ README.md
2
+ pyproject.toml
3
+ src/ClawGuard_PII.egg-info/PKG-INFO
4
+ src/ClawGuard_PII.egg-info/SOURCES.txt
5
+ src/ClawGuard_PII.egg-info/dependency_links.txt
6
+ src/ClawGuard_PII.egg-info/entry_points.txt
7
+ src/ClawGuard_PII.egg-info/requires.txt
8
+ src/ClawGuard_PII.egg-info/top_level.txt
9
+ src/clawguard/__init__.py
10
+ src/clawguard/cli.py
11
+ src/clawguard/redactor.py
12
+ src/clawguard/server.py
13
+ tests/test_redactor.py
14
+ tests/test_server.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ clawguard = clawguard.cli:serve
@@ -0,0 +1,7 @@
1
+ gliner>=0.2.0
2
+ fastapi>=0.111.0
3
+ uvicorn[standard]>=0.30.0
4
+
5
+ [dev]
6
+ pytest>=8.0.0
7
+ httpx>=0.27.0
@@ -0,0 +1,3 @@
1
+ from clawguard.redactor import PIIRedactor, RedactResult
2
+
3
+ __all__ = ["PIIRedactor", "RedactResult"]
@@ -0,0 +1,15 @@
1
+ """CLI entrypoint for ClawGuard."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+
7
+
8
+ def serve() -> None:
9
+ try:
10
+ import uvicorn
11
+ except ImportError:
12
+ print("uvicorn is not installed. Run: pip install clawguard", file=sys.stderr)
13
+ sys.exit(1)
14
+
15
+ uvicorn.run("clawguard.server:app", host="127.0.0.1", port=8000, reload=False)
@@ -0,0 +1,90 @@
1
+ """PII detection and redaction using nvidia/gliner-PII."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+
7
+ from gliner import GLiNER
8
+
9
+
10
+ DEFAULT_MODEL_ID = "nvidia/gliner-PII"
11
+ DEFAULT_THRESHOLD = 0.5
12
+
13
+ DEFAULT_LABELS: list[str] = [
14
+ "email",
15
+ "phone_number",
16
+ "ssn",
17
+ "credit_card_number",
18
+ "bank_account_number",
19
+ "ip_address",
20
+ "password",
21
+ "api_key",
22
+ "user_name",
23
+ "date_of_birth",
24
+ "drivers_license_number",
25
+ "passport_number",
26
+ "address",
27
+ "medical_record_number",
28
+ "health_insurance_id",
29
+ ]
30
+
31
+
32
+ @dataclass
33
+ class RedactResult:
34
+ redacted_text: str
35
+ redacted_count: int
36
+ redacted_items: list[dict] = field(default_factory=list)
37
+
38
+
39
+ def _resolve_overlaps(entities: list[dict]) -> list[dict]:
40
+ """Remove overlapping spans, keeping the highest-confidence entity per region."""
41
+ sorted_by_conf = sorted(entities, key=lambda e: e["score"], reverse=True)
42
+ kept: list[dict] = []
43
+ for ent in sorted_by_conf:
44
+ if any(ent["start"] < k["end"] and ent["end"] > k["start"] for k in kept):
45
+ continue
46
+ kept.append(ent)
47
+ return sorted(kept, key=lambda e: e["start"])
48
+
49
+
50
+ class PIIRedactor:
51
+ def __init__(
52
+ self,
53
+ model_id: str = DEFAULT_MODEL_ID,
54
+ threshold: float = DEFAULT_THRESHOLD,
55
+ labels: list[str] | None = None,
56
+ ):
57
+ self.threshold = threshold
58
+ self.labels = labels or DEFAULT_LABELS
59
+ self._model = GLiNER.from_pretrained(model_id)
60
+
61
+ def redact(self, text: str) -> RedactResult:
62
+ """Detect PII in text and replace each span with [LABEL_UPPER]."""
63
+ raw_entities = self._model.predict_entities(text, self.labels, threshold=self.threshold)
64
+ entities = _resolve_overlaps(raw_entities)
65
+
66
+ parts: list[str] = []
67
+ cursor = 0
68
+ redacted_items: list[dict] = []
69
+
70
+ for ent in entities:
71
+ parts.append(text[cursor : ent["start"]])
72
+ placeholder = f"[{ent['label'].upper()}]"
73
+ parts.append(placeholder)
74
+ cursor = ent["end"]
75
+ redacted_items.append(
76
+ {
77
+ "original": text[ent["start"] : ent["end"]],
78
+ "label": ent["label"],
79
+ "replacement": placeholder,
80
+ "confidence": ent["score"],
81
+ }
82
+ )
83
+
84
+ parts.append(text[cursor:])
85
+
86
+ return RedactResult(
87
+ redacted_text="".join(parts),
88
+ redacted_count=len(entities),
89
+ redacted_items=redacted_items,
90
+ )
@@ -0,0 +1,102 @@
1
+ """FastAPI service exposing PII redaction for OpenClaw."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import hmac
7
+ import os
8
+ from contextlib import asynccontextmanager
9
+ from typing import Annotated
10
+
11
+ from fastapi import Depends, FastAPI, HTTPException, Query, Security
12
+ from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
13
+ from pydantic import BaseModel
14
+
15
+ from clawguard.redactor import PIIRedactor, RedactResult
16
+
17
+
18
+ _raw_token = os.environ.get("CLAWGUARD_TOKEN", "")
19
+ if not _raw_token or _raw_token == "change-me":
20
+ raise RuntimeError(
21
+ "CLAWGUARD_TOKEN environment variable is not set or uses the default placeholder. "
22
+ "Generate a secret with: python3 -c \"import secrets; print(secrets.token_hex(32))\""
23
+ )
24
+
25
+ API_TOKEN: str = _raw_token
26
+ MODEL_ID: str = os.environ.get("MODEL_ID", "nvidia/gliner-PII")
27
+ THRESHOLD: float = float(os.environ.get("THRESHOLD", "0.5"))
28
+ MAX_TEXT_LENGTH: int = int(os.environ.get("MAX_TEXT_LENGTH", "50000"))
29
+
30
+ security = HTTPBearer()
31
+
32
+
33
+ def verify_token(
34
+ credentials: Annotated[HTTPAuthorizationCredentials, Security(security)],
35
+ ) -> str:
36
+ if not hmac.compare_digest(credentials.credentials, API_TOKEN):
37
+ raise HTTPException(status_code=401, detail="Invalid token")
38
+ return credentials.credentials
39
+
40
+
41
+ class TextRequest(BaseModel):
42
+ text: str
43
+
44
+
45
+ class RedactedItem(BaseModel):
46
+ label: str
47
+ replacement: str
48
+ confidence: float
49
+ original: str | None = None
50
+
51
+
52
+ class RedactResponse(BaseModel):
53
+ redacted_text: str
54
+ redacted_count: int
55
+ redacted_items: list[RedactedItem]
56
+
57
+
58
+ @asynccontextmanager
59
+ async def lifespan(app: FastAPI):
60
+ app.state.redactor = PIIRedactor(model_id=MODEL_ID, threshold=THRESHOLD)
61
+ yield
62
+
63
+
64
+ app = FastAPI(title="ClawGuard PII Redaction Service", version="0.1.0", lifespan=lifespan)
65
+
66
+
67
+ @app.get("/health")
68
+ async def health():
69
+ return {"status": "ok"}
70
+
71
+
72
+ @app.post("/redact", response_model=RedactResponse, dependencies=[Depends(verify_token)])
73
+ async def redact(
74
+ req: TextRequest,
75
+ include_original: Annotated[bool, Query()] = False,
76
+ ) -> RedactResponse:
77
+ if not req.text:
78
+ return RedactResponse(redacted_text="", redacted_count=0, redacted_items=[])
79
+
80
+ if len(req.text) > MAX_TEXT_LENGTH:
81
+ raise HTTPException(
82
+ status_code=413,
83
+ detail=f"Text exceeds maximum allowed length of {MAX_TEXT_LENGTH} characters.",
84
+ )
85
+
86
+ result: RedactResult = await asyncio.to_thread(app.state.redactor.redact, req.text)
87
+
88
+ items = [
89
+ RedactedItem(
90
+ label=item["label"],
91
+ replacement=item["replacement"],
92
+ confidence=item["confidence"],
93
+ original=item["original"] if include_original else None,
94
+ )
95
+ for item in result.redacted_items
96
+ ]
97
+
98
+ return RedactResponse(
99
+ redacted_text=result.redacted_text,
100
+ redacted_count=result.redacted_count,
101
+ redacted_items=items,
102
+ )
@@ -0,0 +1,189 @@
1
+ """Tests for PIIRedactor."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from unittest.mock import MagicMock, patch
6
+
7
+ import pytest
8
+
9
+ from clawguard.redactor import (
10
+ DEFAULT_LABELS,
11
+ DEFAULT_THRESHOLD,
12
+ PIIRedactor,
13
+ RedactResult,
14
+ _resolve_overlaps,
15
+ )
16
+
17
+
18
+ def _make_redactor(predict_return: list[dict]) -> PIIRedactor:
19
+ with patch("clawguard.redactor.GLiNER") as MockGLiNER:
20
+ mock_model = MagicMock()
21
+ MockGLiNER.from_pretrained.return_value = mock_model
22
+ mock_model.predict_entities.return_value = predict_return
23
+ redactor = PIIRedactor()
24
+ return redactor
25
+
26
+
27
+ class TestDefaults:
28
+ def test_default_labels_is_nonempty_list(self):
29
+ assert isinstance(DEFAULT_LABELS, list)
30
+ assert len(DEFAULT_LABELS) > 0
31
+
32
+ def test_loads_nvidia_model_by_default(self):
33
+ with patch("clawguard.redactor.GLiNER") as MockGLiNER:
34
+ MockGLiNER.from_pretrained.return_value = MagicMock()
35
+ PIIRedactor()
36
+ MockGLiNER.from_pretrained.assert_called_once_with("nvidia/gliner-PII")
37
+
38
+ def test_default_threshold_is_0_5(self):
39
+ with patch("clawguard.redactor.GLiNER") as MockGLiNER:
40
+ MockGLiNER.from_pretrained.return_value = MagicMock()
41
+ r = PIIRedactor()
42
+ assert r.threshold == 0.5
43
+
44
+ def test_default_threshold_constant(self):
45
+ assert DEFAULT_THRESHOLD == 0.5
46
+
47
+
48
+ class TestRedactCleanText:
49
+ def test_clean_text_unchanged(self):
50
+ redactor = _make_redactor([])
51
+ result = redactor.redact("What is the capital of France?")
52
+ assert result.redacted_text == "What is the capital of France?"
53
+ assert result.redacted_count == 0
54
+ assert result.redacted_items == []
55
+
56
+ def test_returns_redact_result(self):
57
+ redactor = _make_redactor([])
58
+ result = redactor.redact("hello")
59
+ assert isinstance(result, RedactResult)
60
+
61
+
62
+ class TestRedactSingleEntity:
63
+ def test_email_redacted(self):
64
+ redactor = _make_redactor([
65
+ {"text": "john@example.com", "label": "email", "start": 12, "end": 28, "score": 0.99},
66
+ ])
67
+ result = redactor.redact("Contact me: john@example.com")
68
+ assert "john@example.com" not in result.redacted_text
69
+ assert "[EMAIL]" in result.redacted_text
70
+ assert result.redacted_count == 1
71
+
72
+ def test_phone_redacted(self):
73
+ redactor = _make_redactor([
74
+ {"text": "555-123-4567", "label": "phone_number", "start": 9, "end": 21, "score": 0.97},
75
+ ])
76
+ result = redactor.redact("Call me: 555-123-4567")
77
+ assert "555-123-4567" not in result.redacted_text
78
+ assert "[PHONE_NUMBER]" in result.redacted_text
79
+
80
+ def test_ssn_redacted(self):
81
+ redactor = _make_redactor([
82
+ {"text": "123-45-6789", "label": "ssn", "start": 12, "end": 23, "score": 0.98},
83
+ ])
84
+ result = redactor.redact("My SSN is: 123-45-6789")
85
+ assert "123-45-6789" not in result.redacted_text
86
+ assert "[SSN]" in result.redacted_text
87
+
88
+ def test_label_uppercased(self):
89
+ redactor = _make_redactor([
90
+ {"text": "foo@bar.com", "label": "email", "start": 0, "end": 11, "score": 0.95},
91
+ ])
92
+ result = redactor.redact("foo@bar.com")
93
+ assert result.redacted_text == "[EMAIL]"
94
+
95
+
96
+ class TestRedactMultipleEntities:
97
+ def test_two_entities_both_redacted(self):
98
+ redactor = _make_redactor([
99
+ {"text": "john@example.com", "label": "email", "start": 0, "end": 16, "score": 0.99},
100
+ {"text": "555-123-4567", "label": "phone_number", "start": 20, "end": 32, "score": 0.97},
101
+ ])
102
+ result = redactor.redact("john@example.com and 555-123-4567")
103
+ assert result.redacted_count == 2
104
+ assert "[EMAIL]" in result.redacted_text
105
+ assert "[PHONE_NUMBER]" in result.redacted_text
106
+ assert "john@example.com" not in result.redacted_text
107
+ assert "555-123-4567" not in result.redacted_text
108
+
109
+
110
+ class TestOverlapResolution:
111
+ def test_higher_confidence_wins_on_overlap(self):
112
+ redactor = _make_redactor([
113
+ {"text": "john", "label": "user_name", "start": 0, "end": 4, "score": 0.9},
114
+ {"text": "john@example.com", "label": "email", "start": 0, "end": 16, "score": 0.99},
115
+ ])
116
+ result = redactor.redact("john@example.com")
117
+ assert result.redacted_text == "[EMAIL]"
118
+ assert result.redacted_count == 1
119
+
120
+ def test_nested_span_discarded(self):
121
+ redactor = _make_redactor([
122
+ {"text": "john@example.com", "label": "email", "start": 0, "end": 16, "score": 0.99},
123
+ {"text": "john", "label": "user_name", "start": 0, "end": 4, "score": 0.5},
124
+ ])
125
+ result = redactor.redact("john@example.com")
126
+ assert result.redacted_text == "[EMAIL]"
127
+ assert result.redacted_count == 1
128
+
129
+ def test_non_overlapping_spans_both_kept(self):
130
+ redactor = _make_redactor([
131
+ {"text": "john@example.com", "label": "email", "start": 0, "end": 16, "score": 0.99},
132
+ {"text": "555-123-4567", "label": "phone_number", "start": 20, "end": 32, "score": 0.97},
133
+ ])
134
+ result = redactor.redact("john@example.com and 555-123-4567")
135
+ assert result.redacted_count == 2
136
+
137
+ def test_resolve_overlaps_unit(self):
138
+ entities = [
139
+ {"text": "john", "label": "user_name", "start": 0, "end": 4, "score": 0.9},
140
+ {"text": "john@example.com", "label": "email", "start": 0, "end": 16, "score": 0.99},
141
+ {"text": "555-123-4567", "label": "phone_number", "start": 20, "end": 32, "score": 0.97},
142
+ ]
143
+ kept = _resolve_overlaps(entities)
144
+ assert len(kept) == 2
145
+ assert kept[0]["label"] == "email"
146
+ assert kept[1]["label"] == "phone_number"
147
+
148
+
149
+ class TestRedactItems:
150
+ def test_redacted_items_contain_metadata(self):
151
+ redactor = _make_redactor([
152
+ {"text": "john@example.com", "label": "email", "start": 0, "end": 16, "score": 0.99},
153
+ ])
154
+ result = redactor.redact("john@example.com")
155
+ assert len(result.redacted_items) == 1
156
+ item = result.redacted_items[0]
157
+ assert item["original"] == "john@example.com"
158
+ assert item["label"] == "email"
159
+ assert item["replacement"] == "[EMAIL]"
160
+ assert item["confidence"] == 0.99
161
+
162
+ def test_redacted_items_ordered_by_appearance(self):
163
+ redactor = _make_redactor([
164
+ {"text": "555-123-4567", "label": "phone_number", "start": 20, "end": 32, "score": 0.97},
165
+ {"text": "john@example.com", "label": "email", "start": 0, "end": 16, "score": 0.99},
166
+ ])
167
+ result = redactor.redact("john@example.com and 555-123-4567")
168
+ assert result.redacted_items[0]["label"] == "email"
169
+ assert result.redacted_items[1]["label"] == "phone_number"
170
+
171
+
172
+ class TestRedactPassesConfig:
173
+ def test_custom_threshold_passed_to_model(self):
174
+ with patch("clawguard.redactor.GLiNER") as MockGLiNER:
175
+ mock_model = MagicMock()
176
+ MockGLiNER.from_pretrained.return_value = mock_model
177
+ mock_model.predict_entities.return_value = []
178
+ redactor = PIIRedactor(threshold=0.8)
179
+
180
+ redactor.redact("test text")
181
+ args, kwargs = mock_model.predict_entities.call_args
182
+ threshold = kwargs.get("threshold") or (args[2] if len(args) > 2 else None)
183
+ assert threshold == 0.8
184
+
185
+ def test_custom_model_id_used(self):
186
+ with patch("clawguard.redactor.GLiNER") as MockGLiNER:
187
+ MockGLiNER.from_pretrained.return_value = MagicMock()
188
+ PIIRedactor(model_id="custom/model")
189
+ MockGLiNER.from_pretrained.assert_called_once_with("custom/model")
@@ -0,0 +1,207 @@
1
+ """Tests for the FastAPI server."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import importlib
6
+ from unittest.mock import MagicMock, patch
7
+
8
+ import pytest
9
+ from fastapi.testclient import TestClient
10
+
11
+ from clawguard.redactor import RedactResult
12
+
13
+
14
+ @pytest.fixture
15
+ def client(monkeypatch):
16
+ monkeypatch.setenv("CLAWGUARD_TOKEN", "test-token")
17
+
18
+ mock_redactor = MagicMock()
19
+ mock_redactor.redact.return_value = RedactResult(
20
+ redacted_text="Contact [EMAIL]",
21
+ redacted_count=1,
22
+ redacted_items=[{
23
+ "original": "john@example.com",
24
+ "label": "email",
25
+ "replacement": "[EMAIL]",
26
+ "confidence": 0.99,
27
+ }],
28
+ )
29
+
30
+ with patch("clawguard.server.PIIRedactor", return_value=mock_redactor):
31
+ import clawguard.server as srv
32
+ importlib.reload(srv)
33
+ with TestClient(srv.app) as tc:
34
+ yield tc
35
+
36
+
37
+ @pytest.fixture
38
+ def clean_client(monkeypatch):
39
+ """Client whose mock redactor returns no PII."""
40
+ monkeypatch.setenv("CLAWGUARD_TOKEN", "test-token")
41
+
42
+ mock_redactor = MagicMock()
43
+ mock_redactor.redact.return_value = RedactResult(
44
+ redacted_text="Hello world",
45
+ redacted_count=0,
46
+ redacted_items=[],
47
+ )
48
+
49
+ with patch("clawguard.server.PIIRedactor", return_value=mock_redactor):
50
+ import clawguard.server as srv
51
+ importlib.reload(srv)
52
+ with TestClient(srv.app) as tc:
53
+ yield tc
54
+
55
+
56
+ class TestHealth:
57
+ def test_health_returns_ok(self, client):
58
+ resp = client.get("/health")
59
+ assert resp.status_code == 200
60
+ assert resp.json()["status"] == "ok"
61
+
62
+ def test_health_requires_no_auth(self, client):
63
+ resp = client.get("/health")
64
+ assert resp.status_code == 200
65
+
66
+
67
+ class TestRedactEndpoint:
68
+ def test_valid_request_returns_200(self, client):
69
+ resp = client.post(
70
+ "/redact",
71
+ json={"text": "Contact john@example.com"},
72
+ headers={"Authorization": "Bearer test-token"},
73
+ )
74
+ assert resp.status_code == 200
75
+
76
+ def test_response_shape(self, client):
77
+ resp = client.post(
78
+ "/redact",
79
+ json={"text": "Contact john@example.com"},
80
+ headers={"Authorization": "Bearer test-token"},
81
+ )
82
+ data = resp.json()
83
+ assert "redacted_text" in data
84
+ assert "redacted_count" in data
85
+ assert "redacted_items" in data
86
+
87
+ def test_redacted_text_returned(self, client):
88
+ resp = client.post(
89
+ "/redact",
90
+ json={"text": "Contact john@example.com"},
91
+ headers={"Authorization": "Bearer test-token"},
92
+ )
93
+ assert resp.json()["redacted_text"] == "Contact [EMAIL]"
94
+
95
+ def test_redacted_count_returned(self, client):
96
+ resp = client.post(
97
+ "/redact",
98
+ json={"text": "Contact john@example.com"},
99
+ headers={"Authorization": "Bearer test-token"},
100
+ )
101
+ assert resp.json()["redacted_count"] == 1
102
+
103
+ def test_empty_text_short_circuits(self, clean_client):
104
+ resp = clean_client.post(
105
+ "/redact",
106
+ json={"text": ""},
107
+ headers={"Authorization": "Bearer test-token"},
108
+ )
109
+ assert resp.status_code == 200
110
+ data = resp.json()
111
+ assert data["redacted_text"] == ""
112
+ assert data["redacted_count"] == 0
113
+ assert data["redacted_items"] == []
114
+
115
+ def test_oversized_text_rejected(self, monkeypatch):
116
+ monkeypatch.setenv("CLAWGUARD_TOKEN", "test-token")
117
+ monkeypatch.setenv("MAX_TEXT_LENGTH", "10")
118
+
119
+ mock_redactor = MagicMock()
120
+ mock_redactor.redact.return_value = RedactResult("", 0, [])
121
+
122
+ with patch("clawguard.server.PIIRedactor", return_value=mock_redactor):
123
+ import clawguard.server as srv
124
+ importlib.reload(srv)
125
+ with TestClient(srv.app) as tc:
126
+ resp = tc.post(
127
+ "/redact",
128
+ json={"text": "x" * 11},
129
+ headers={"Authorization": "Bearer test-token"},
130
+ )
131
+ assert resp.status_code == 413
132
+
133
+
134
+ class TestOriginalField:
135
+ def test_original_omitted_by_default(self, client):
136
+ resp = client.post(
137
+ "/redact",
138
+ json={"text": "Contact john@example.com"},
139
+ headers={"Authorization": "Bearer test-token"},
140
+ )
141
+ items = resp.json()["redacted_items"]
142
+ assert len(items) == 1
143
+ assert items[0].get("original") is None
144
+
145
+ def test_original_included_when_requested(self, client):
146
+ resp = client.post(
147
+ "/redact?include_original=true",
148
+ json={"text": "Contact john@example.com"},
149
+ headers={"Authorization": "Bearer test-token"},
150
+ )
151
+ items = resp.json()["redacted_items"]
152
+ assert len(items) == 1
153
+ assert items[0]["original"] == "john@example.com"
154
+
155
+
156
+ class TestAuth:
157
+ def test_missing_auth_rejected(self, client):
158
+ resp = client.post("/redact", json={"text": "hello"})
159
+ assert resp.status_code in (401, 403)
160
+
161
+ def test_wrong_token_rejected(self, client):
162
+ resp = client.post(
163
+ "/redact",
164
+ json={"text": "hello"},
165
+ headers={"Authorization": "Bearer wrong-token"},
166
+ )
167
+ assert resp.status_code == 401
168
+
169
+ def test_correct_token_accepted(self, client):
170
+ resp = client.post(
171
+ "/redact",
172
+ json={"text": "hello"},
173
+ headers={"Authorization": "Bearer test-token"},
174
+ )
175
+ assert resp.status_code == 200
176
+
177
+
178
+ class TestStartupGuard:
179
+ def test_missing_token_raises_on_import(self, monkeypatch):
180
+ monkeypatch.delenv("CLAWGUARD_TOKEN", raising=False)
181
+ import clawguard.server as srv
182
+ with pytest.raises(RuntimeError, match="CLAWGUARD_TOKEN"):
183
+ importlib.reload(srv)
184
+
185
+ def test_default_placeholder_raises_on_import(self, monkeypatch):
186
+ monkeypatch.setenv("CLAWGUARD_TOKEN", "change-me")
187
+ import clawguard.server as srv
188
+ with pytest.raises(RuntimeError, match="CLAWGUARD_TOKEN"):
189
+ importlib.reload(srv)
190
+
191
+
192
+ class TestValidation:
193
+ def test_missing_text_field_rejected(self, client):
194
+ resp = client.post(
195
+ "/redact",
196
+ json={},
197
+ headers={"Authorization": "Bearer test-token"},
198
+ )
199
+ assert resp.status_code == 422
200
+
201
+ def test_empty_text_accepted(self, client):
202
+ resp = client.post(
203
+ "/redact",
204
+ json={"text": ""},
205
+ headers={"Authorization": "Bearer test-token"},
206
+ )
207
+ assert resp.status_code == 200