contextduty 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ """ContextDuty — policy-driven context firewall for AI workflows."""
2
+
3
+ __version__ = "0.1.0"
contextduty/cli.py ADDED
@@ -0,0 +1,127 @@
1
+ """CLI entrypoint for ContextDuty."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import sys
8
+ from pathlib import Path
9
+
10
+ from .engine import redact_file, report_to_json, scan_file
11
+ from .policy import load_policy, unknown_detector_names, write_default_policy
12
+
13
+
14
+ def _parser() -> argparse.ArgumentParser:
15
+ from . import __version__
16
+
17
+ parser = argparse.ArgumentParser(
18
+ prog="contextduty", description="Protect AI context with policy checks."
19
+ )
20
+ parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
21
+ subparsers = parser.add_subparsers(dest="command", required=True)
22
+
23
+ init_parser = subparsers.add_parser("init", help="Create default policy file.")
24
+ init_parser.add_argument("--path", default=".contextduty.json", help="Policy output path.")
25
+
26
+ scan_parser = subparsers.add_parser("scan", help="Scan a text file for risky data.")
27
+ scan_parser.add_argument("target", help="Input file path.")
28
+ scan_parser.add_argument("--policy", default=".contextduty.json", help="Policy path.")
29
+ scan_parser.add_argument("--report", help="Optional report output JSON path.")
30
+
31
+ redact_parser = subparsers.add_parser("redact", help="Redact risky data from an input file.")
32
+ redact_parser.add_argument("--in", dest="input_path", required=True, help="Input file path.")
33
+ redact_parser.add_argument("--out", dest="output_path", required=True, help="Output file path.")
34
+ redact_parser.add_argument("--policy", default=".contextduty.json", help="Policy path.")
35
+ redact_parser.add_argument("--report", help="Optional report output JSON path.")
36
+
37
+ policy_parser = subparsers.add_parser("policy", help="Policy operations.")
38
+ policy_subparsers = policy_parser.add_subparsers(dest="policy_command", required=True)
39
+ validate_parser = policy_subparsers.add_parser(
40
+ "validate", help="Validate and resolve a policy file."
41
+ )
42
+ validate_parser.add_argument("--policy", default=".contextduty.json", help="Policy path.")
43
+ validate_parser.add_argument(
44
+ "--strict",
45
+ action="store_true",
46
+ help="Fail validation when unknown detector names are present.",
47
+ )
48
+
49
+ return parser
50
+
51
+
52
+ def _load_policy_with_fallback(policy_path: str) -> tuple[Path | None, object]:
53
+ path = Path(policy_path)
54
+ if path.exists():
55
+ return path, load_policy(path)
56
+ return None, load_policy(None)
57
+
58
+
59
+ def main() -> None:
60
+ parser = _parser()
61
+ args = parser.parse_args()
62
+
63
+ if args.command == "init":
64
+ out_path = Path(args.path)
65
+ write_default_policy(out_path)
66
+ print(f"Created policy at {out_path}")
67
+ return
68
+
69
+ if args.command == "scan":
70
+ policy_ref, policy = _load_policy_with_fallback(args.policy)
71
+ result = scan_file(Path(args.target), policy)
72
+ report = report_to_json(result)
73
+ print(report)
74
+ if args.report:
75
+ Path(args.report).write_text(report + "\n", encoding="utf-8")
76
+ print(f"Saved report to {args.report}")
77
+ if result.blocked:
78
+ print(f"BLOCKED by policy ({policy_ref or 'default'})", file=sys.stderr)
79
+ raise SystemExit(2)
80
+ return
81
+
82
+ if args.command == "redact":
83
+ policy_ref, policy = _load_policy_with_fallback(args.policy)
84
+ result = redact_file(Path(args.input_path), Path(args.output_path), policy)
85
+ report = report_to_json(result)
86
+ print(report)
87
+ if args.report:
88
+ Path(args.report).write_text(report + "\n", encoding="utf-8")
89
+ print(f"Saved report to {args.report}")
90
+ if result.blocked:
91
+ print(f"BLOCKED by policy ({policy_ref or 'default'})", file=sys.stderr)
92
+ raise SystemExit(2)
93
+ return
94
+
95
+ if args.command == "policy":
96
+ if args.policy_command == "validate":
97
+ policy_path = Path(args.policy)
98
+ if policy_path.exists():
99
+ policy = load_policy(policy_path)
100
+ source = str(policy_path)
101
+ else:
102
+ policy = load_policy(None)
103
+ source = "default"
104
+ payload = {
105
+ "valid": True,
106
+ "source": source,
107
+ "mode": policy.mode,
108
+ "detectors": sorted(policy.detectors),
109
+ "custom_detectors": sorted(policy.custom_detectors.keys()),
110
+ }
111
+ if args.strict:
112
+ unknown = unknown_detector_names(policy)
113
+ if unknown:
114
+ print(
115
+ f"Unknown detector names in strict mode: {', '.join(unknown)}",
116
+ file=sys.stderr,
117
+ )
118
+ raise SystemExit(2)
119
+ print(json.dumps(payload, indent=2))
120
+ return
121
+ raise SystemExit(1)
122
+
123
+ raise SystemExit(1)
124
+
125
+
126
+ if __name__ == "__main__":
127
+ main()
@@ -0,0 +1,36 @@
1
+ """Built-in detectors for secrets and PII."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import re
7
+ from dataclasses import dataclass
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class Detector:
12
+ name: str
13
+ pattern: re.Pattern[str]
14
+
15
+
16
+ DETECTORS: list[Detector] = [
17
+ Detector(
18
+ "email", re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b")
19
+ ),
20
+ Detector(
21
+ "phone",
22
+ re.compile(
23
+ r"\b(?:\+?1[-.\s]?)?(?:\(?\d{3}\)?[-.\s]?)\d{3}[-.\s]?\d{4}\b"
24
+ ),
25
+ ),
26
+ Detector("api_key", re.compile(r"\b(?:sk|rk|pk)_[A-Za-z0-9_]{16,}\b")),
27
+ Detector("aws_key", re.compile(r"\bAKIA[0-9A-Z]{16}\b")),
28
+ Detector(
29
+ "bearer_token", re.compile(r"\bBearer\s+[A-Za-z0-9\-._~+/]+=*\b", re.IGNORECASE)
30
+ ),
31
+ ]
32
+
33
+
34
+ def stable_mask(detector_name: str, value: str) -> str:
35
+ digest = hashlib.sha256(value.encode("utf-8")).hexdigest()[:10]
36
+ return f"<{detector_name.upper()}_{digest}>"
contextduty/engine.py ADDED
@@ -0,0 +1,136 @@
1
+ """Scanning and redaction engine."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+ from typing import Iterable
10
+
11
+ from .detectors import DETECTORS, Detector, stable_mask
12
+ from .policy import Policy
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class Finding:
17
+ detector: str
18
+ value: str
19
+
20
+
21
+ @dataclass(frozen=True)
22
+ class ScanResult:
23
+ findings_count: int
24
+ detector_counts: dict[str, int]
25
+ blocked: bool
26
+
27
+
28
+ def _active_detectors(policy: Policy) -> list[Detector]:
29
+ active = [detector for detector in DETECTORS if detector.name in policy.detectors]
30
+ for name, pattern in policy.custom_detectors.items():
31
+ if name in policy.detectors:
32
+ active.append(Detector(name=name, pattern=re.compile(pattern)))
33
+ return active
34
+
35
+
36
+ def _scan_line(line: str, detectors: Iterable[Detector]) -> list[Finding]:
37
+ findings: list[Finding] = []
38
+ for detector in detectors:
39
+ for match in detector.pattern.finditer(line):
40
+ findings.append(Finding(detector=detector.name, value=match.group(0)))
41
+ return findings
42
+
43
+
44
+ def scan_file(path: Path, policy: Policy) -> ScanResult:
45
+ detectors = _active_detectors(policy)
46
+ detector_counts: dict[str, int] = {}
47
+ findings_count = 0
48
+ with path.open("r", encoding="utf-8", errors="replace") as handle:
49
+ for line in handle:
50
+ findings = _scan_line(line, detectors)
51
+ findings_count += len(findings)
52
+ for finding in findings:
53
+ detector_counts[finding.detector] = detector_counts.get(finding.detector, 0) + 1
54
+ blocked = findings_count > 0 and policy.mode == "block"
55
+ return ScanResult(
56
+ findings_count=findings_count, detector_counts=detector_counts, blocked=blocked
57
+ )
58
+
59
+
60
+ def redact_file(input_path: Path, output_path: Path, policy: Policy) -> ScanResult:
61
+ detectors = _active_detectors(policy)
62
+ detector_counts: dict[str, int] = {}
63
+ findings_count = 0
64
+ blocked = False
65
+
66
+ with (
67
+ input_path.open("r", encoding="utf-8", errors="replace") as source,
68
+ output_path.open("w", encoding="utf-8") as target,
69
+ ):
70
+ for line in source:
71
+ updated = line
72
+ findings = _scan_line(updated, detectors)
73
+ findings_count += len(findings)
74
+ for finding in findings:
75
+ detector_counts[finding.detector] = detector_counts.get(finding.detector, 0) + 1
76
+ if policy.mode == "redact":
77
+ updated = updated.replace(
78
+ finding.value, stable_mask(finding.detector, finding.value)
79
+ )
80
+ target.write(updated)
81
+
82
+ if findings_count > 0 and policy.mode == "block":
83
+ blocked = True
84
+
85
+ return ScanResult(
86
+ findings_count=findings_count, detector_counts=detector_counts, blocked=blocked
87
+ )
88
+
89
+
90
+ @dataclass(frozen=True)
91
+ class ScanTextResult:
92
+ """Result of scanning and redacting an in-memory text string."""
93
+
94
+ scan: ScanResult
95
+ redacted_text: str
96
+
97
+
98
+ def scan_text(text: str, policy: Policy) -> ScanTextResult:
99
+ """Scan and redact an in-memory string without touching the filesystem.
100
+
101
+ This is the primary entry point for MCP tool use — the LLM passes raw
102
+ text and receives back a findings report plus the redacted version.
103
+ """
104
+ detectors = _active_detectors(policy)
105
+ detector_counts: dict[str, int] = {}
106
+ findings_count = 0
107
+ redacted = text
108
+
109
+ for line in text.splitlines(keepends=True):
110
+ findings = _scan_line(line, detectors)
111
+ findings_count += len(findings)
112
+ for finding in findings:
113
+ detector_counts[finding.detector] = detector_counts.get(finding.detector, 0) + 1
114
+ if policy.mode == "redact":
115
+ redacted = redacted.replace(
116
+ finding.value, stable_mask(finding.detector, finding.value)
117
+ )
118
+
119
+ blocked = findings_count > 0 and policy.mode == "block"
120
+ scan_result = ScanResult(
121
+ findings_count=findings_count,
122
+ detector_counts=detector_counts,
123
+ blocked=blocked,
124
+ )
125
+ return ScanTextResult(
126
+ scan=scan_result, redacted_text=redacted if policy.mode == "redact" else text
127
+ )
128
+
129
+
130
+ def report_to_json(result: ScanResult) -> str:
131
+ payload = {
132
+ "findings_count": result.findings_count,
133
+ "detector_counts": result.detector_counts,
134
+ "blocked": result.blocked,
135
+ }
136
+ return json.dumps(payload, indent=2)
@@ -0,0 +1,222 @@
1
+ """Minimal MCP stdio server exposing ContextDuty as tools.
2
+
3
+ Implements:
4
+ - initialize
5
+ - tools/list
6
+ - tools/call
7
+
8
+ Spec: https://modelcontextprotocol.io/specification/2025-06-18/server/tools
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import sys
15
+ from pathlib import Path
16
+ from typing import Any
17
+
18
+ from .engine import redact_file, report_to_json, scan_file, scan_text
19
+ from .policy import load_policy
20
+
21
+ PROTOCOL_VERSION = "2025-06-18"
22
+
23
+
24
+ def _send(obj: dict[str, Any]) -> None:
25
+ sys.stdout.write(json.dumps(obj, separators=(",", ":")) + "\n")
26
+ sys.stdout.flush()
27
+
28
+
29
+ def _err(_id: Any, code: int, message: str) -> dict[str, Any]:
30
+ return {"jsonrpc": "2.0", "id": _id, "error": {"code": code, "message": message}}
31
+
32
+
33
+ def _ok(_id: Any, result: dict[str, Any]) -> dict[str, Any]:
34
+ return {"jsonrpc": "2.0", "id": _id, "result": result}
35
+
36
+
37
+ def _tool_result(
38
+ text: str, is_error: bool = False, structured: dict[str, Any] | None = None
39
+ ) -> dict[str, Any]:
40
+ payload: dict[str, Any] = {"content": [{"type": "text", "text": text}], "isError": is_error}
41
+ if structured is not None:
42
+ payload["structuredContent"] = structured
43
+ return payload
44
+
45
+
46
+ def _tools_list() -> list[dict[str, Any]]:
47
+ return [
48
+ {
49
+ "name": "contextduty_scan_text",
50
+ "title": "ContextDuty Scan Text",
51
+ "description": (
52
+ "Scan a raw text string for sensitive data (emails, API keys, tokens, etc.) "
53
+ "before it is sent to an LLM. Returns findings and the redacted version of the text." # noqa: E501
54
+ "Use this to check prompt content, log snippets, or any in-memory string."
55
+ ),
56
+ "inputSchema": {
57
+ "type": "object",
58
+ "properties": {
59
+ "text": {
60
+ "type": "string",
61
+ "description": "The text content to scan and redact.",
62
+ },
63
+ "policyPath": {
64
+ "type": "string",
65
+ "description": "Optional policy JSON path (.contextduty.json).",
66
+ },
67
+ },
68
+ "required": ["text"],
69
+ },
70
+ },
71
+ {
72
+ "name": "contextduty_scan",
73
+ "title": "ContextDuty Scan",
74
+ "description": "Scan a file for sensitive data based on ContextDuty policy.",
75
+ "inputSchema": {
76
+ "type": "object",
77
+ "properties": {
78
+ "path": {"type": "string", "description": "Path to input file to scan."},
79
+ "policyPath": {
80
+ "type": "string",
81
+ "description": "Optional policy JSON path (.contextduty.json).",
82
+ },
83
+ },
84
+ "required": ["path"],
85
+ },
86
+ },
87
+ {
88
+ "name": "contextduty_redact",
89
+ "title": "ContextDuty Redact",
90
+ "description": "Redact sensitive data from an input file into an output file based on ContextDuty policy.", # noqa: E501
91
+ "inputSchema": {
92
+ "type": "object",
93
+ "properties": {
94
+ "inputPath": {"type": "string", "description": "Path to input file."},
95
+ "outputPath": {"type": "string", "description": "Path to write redacted file."},
96
+ "policyPath": {
97
+ "type": "string",
98
+ "description": "Optional policy JSON path (.contextduty.json).",
99
+ },
100
+ },
101
+ "required": ["inputPath", "outputPath"],
102
+ },
103
+ },
104
+ ]
105
+
106
+
107
+ def _load_policy(policy_path: str | None):
108
+ if not policy_path:
109
+ return load_policy(None)
110
+ p = Path(policy_path)
111
+ if not p.exists():
112
+ return load_policy(None)
113
+ return load_policy(p)
114
+
115
+
116
+ def _handle_tools_call(params: dict[str, Any]) -> dict[str, Any]:
117
+ name = params.get("name")
118
+ args = params.get("arguments") or {}
119
+
120
+ if name == "contextduty_scan_text":
121
+ text = args.get("text")
122
+ if not isinstance(text, str):
123
+ raise ValueError("Missing required argument: text")
124
+ policy = _load_policy(args.get("policyPath"))
125
+ result = scan_text(text, policy)
126
+ structured = {
127
+ "findings_count": result.scan.findings_count,
128
+ "detector_counts": result.scan.detector_counts,
129
+ "blocked": result.scan.blocked,
130
+ "redacted_text": result.redacted_text,
131
+ }
132
+ report = json.dumps(structured, indent=2)
133
+ return _tool_result(report, is_error=result.scan.blocked, structured=structured)
134
+
135
+ if name == "contextduty_scan":
136
+ path = args.get("path")
137
+ if not isinstance(path, str) or not path:
138
+ raise ValueError("Missing required argument: path")
139
+ policy = _load_policy(args.get("policyPath"))
140
+ result = scan_file(Path(path), policy)
141
+ report = report_to_json(result)
142
+ structured = {
143
+ "findings_count": result.findings_count,
144
+ "detector_counts": result.detector_counts,
145
+ "blocked": result.blocked,
146
+ }
147
+ return _tool_result(report, is_error=False, structured=structured)
148
+
149
+ if name == "contextduty_redact":
150
+ input_path = args.get("inputPath")
151
+ output_path = args.get("outputPath")
152
+ if not isinstance(input_path, str) or not input_path:
153
+ raise ValueError("Missing required argument: inputPath")
154
+ if not isinstance(output_path, str) or not output_path:
155
+ raise ValueError("Missing required argument: outputPath")
156
+ policy = _load_policy(args.get("policyPath"))
157
+ result = redact_file(Path(input_path), Path(output_path), policy)
158
+ report = report_to_json(result)
159
+ structured = {
160
+ "findings_count": result.findings_count,
161
+ "detector_counts": result.detector_counts,
162
+ "blocked": result.blocked,
163
+ "output_path": output_path,
164
+ }
165
+ return _tool_result(report, is_error=False, structured=structured)
166
+
167
+ raise KeyError(f"Unknown tool: {name}")
168
+
169
+
170
+ def run_stdio() -> None:
171
+ for raw in sys.stdin:
172
+ raw = raw.strip()
173
+ if not raw:
174
+ continue
175
+
176
+ try:
177
+ msg = json.loads(raw)
178
+ except Exception:
179
+ continue
180
+
181
+ _id = msg.get("id")
182
+ method = msg.get("method")
183
+ params = msg.get("params") or {}
184
+
185
+ if _id is None:
186
+ continue
187
+
188
+ try:
189
+ if method == "initialize":
190
+ result = {
191
+ "protocolVersion": PROTOCOL_VERSION,
192
+ "capabilities": {"tools": {"listChanged": False}},
193
+ "serverInfo": {"name": "contextduty", "version": "0.1.0"},
194
+ }
195
+ _send(_ok(_id, result))
196
+ continue
197
+
198
+ if method == "tools/list":
199
+ _send(_ok(_id, {"tools": _tools_list()}))
200
+ continue
201
+
202
+ if method == "tools/call":
203
+ try:
204
+ payload = _handle_tools_call(params)
205
+ _send(_ok(_id, payload))
206
+ except KeyError as e:
207
+ _send(_err(_id, -32602, str(e)))
208
+ except Exception as e:
209
+ _send(_ok(_id, _tool_result(f"{type(e).__name__}: {e}", is_error=True)))
210
+ continue
211
+
212
+ _send(_err(_id, -32601, f"Method not found: {method}"))
213
+ except Exception as e:
214
+ _send(_err(_id, -32603, f"Server error: {type(e).__name__}: {e}"))
215
+
216
+
217
+ def main() -> None:
218
+ run_stdio()
219
+
220
+
221
+ if __name__ == "__main__":
222
+ main()
contextduty/policy.py ADDED
@@ -0,0 +1,146 @@
1
+ """Simple policy loading for ContextDuty."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ from .detectors import DETECTORS
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class Policy:
16
+ mode: str
17
+ detectors: set[str]
18
+ custom_detectors: dict[str, str]
19
+
20
+
21
+ DEFAULT_POLICY = {
22
+ "mode": "redact",
23
+ "detectors": ["email", "phone", "api_key", "aws_key", "bearer_token"],
24
+ "custom_detectors": {},
25
+ }
26
+
27
+
28
+ def write_default_policy(path: Path) -> None:
29
+ path.write_text(json.dumps(DEFAULT_POLICY, indent=2) + "\n", encoding="utf-8")
30
+
31
+
32
+ def _read_policy_config(path: Path) -> dict[str, Any]:
33
+ raw = json.loads(path.read_text(encoding="utf-8"))
34
+ if not isinstance(raw, dict):
35
+ raise ValueError(f"policy file must contain a JSON object: {path}")
36
+ return raw
37
+
38
+
39
+ def _normalize_extends(value: Any) -> list[str]:
40
+ if value is None:
41
+ return []
42
+ if isinstance(value, str):
43
+ return [value]
44
+ if isinstance(value, list) and all(isinstance(item, str) for item in value):
45
+ return value
46
+ raise ValueError("policy extends must be a string or list of strings")
47
+
48
+
49
+ def _merge_policy_configs(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]:
50
+ merged: dict[str, Any] = dict(base)
51
+
52
+ base_detectors = base.get("detectors", [])
53
+ override_detectors = override.get("detectors", [])
54
+ if not isinstance(base_detectors, list) or not all(
55
+ isinstance(name, str) for name in base_detectors
56
+ ):
57
+ raise ValueError("policy detectors must be a list of strings")
58
+ if not isinstance(override_detectors, list) or not all(
59
+ isinstance(name, str) for name in override_detectors
60
+ ):
61
+ raise ValueError("policy detectors must be a list of strings")
62
+ merged["detectors"] = list(dict.fromkeys(base_detectors + override_detectors))
63
+
64
+ base_custom = base.get("custom_detectors", {})
65
+ override_custom = override.get("custom_detectors", {})
66
+ if not isinstance(base_custom, dict) or not isinstance(override_custom, dict):
67
+ raise ValueError("policy custom_detectors must be an object of {name: regex}")
68
+ merged["custom_detectors"] = {**base_custom, **override_custom}
69
+
70
+ if "mode" in override:
71
+ merged["mode"] = override["mode"]
72
+
73
+ return merged
74
+
75
+
76
+ def _resolve_policy_config(path: Path, seen: set[Path] | None = None) -> dict[str, Any]:
77
+ seen = seen or set()
78
+ resolved_path = path.resolve()
79
+ if resolved_path in seen:
80
+ raise ValueError(f"policy extends cycle detected at: {resolved_path}")
81
+ seen.add(resolved_path)
82
+
83
+ config = _read_policy_config(resolved_path)
84
+ parent_refs = _normalize_extends(config.get("extends"))
85
+
86
+ merged: dict[str, Any] = {
87
+ "mode": DEFAULT_POLICY["mode"],
88
+ "detectors": list(DEFAULT_POLICY["detectors"]),
89
+ "custom_detectors": dict(DEFAULT_POLICY["custom_detectors"]),
90
+ }
91
+
92
+ for parent_ref in parent_refs:
93
+ parent_path = (resolved_path.parent / parent_ref).resolve()
94
+ parent_config = _resolve_policy_config(parent_path, seen)
95
+ merged = _merge_policy_configs(merged, parent_config)
96
+
97
+ local_config = dict(config)
98
+ local_config.pop("extends", None)
99
+ merged = _merge_policy_configs(merged, local_config)
100
+
101
+ seen.remove(resolved_path)
102
+ return merged
103
+
104
+
105
+ def load_policy(path: Path | None) -> Policy:
106
+ if path is None:
107
+ config = DEFAULT_POLICY
108
+ else:
109
+ config = _resolve_policy_config(path)
110
+ mode = str(config.get("mode", "redact")).lower()
111
+ if mode not in {"redact", "warn", "block"}:
112
+ raise ValueError("policy mode must be one of: redact, warn, block")
113
+ detectors_raw = config.get("detectors", DEFAULT_POLICY["detectors"])
114
+ if not isinstance(detectors_raw, list) or not all(
115
+ isinstance(name, str) for name in detectors_raw
116
+ ):
117
+ raise ValueError("policy detectors must be a list of strings")
118
+
119
+ custom_raw = config.get("custom_detectors", {})
120
+ if not isinstance(custom_raw, dict):
121
+ raise ValueError("policy custom_detectors must be an object of {name: regex}")
122
+
123
+ built_in_names = {detector.name for detector in DETECTORS}
124
+ custom_detectors: dict[str, str] = {}
125
+ for name, pattern in custom_raw.items():
126
+ if not isinstance(name, str) or not name.strip():
127
+ raise ValueError("custom detector names must be non-empty strings")
128
+ if name in built_in_names:
129
+ raise ValueError(f"custom detector name '{name}' conflicts with built-in detector")
130
+ if not isinstance(pattern, str) or not pattern.strip():
131
+ raise ValueError(f"custom detector '{name}' must have a non-empty regex string")
132
+ try:
133
+ re.compile(pattern)
134
+ except re.error as exc:
135
+ raise ValueError(f"invalid regex for custom detector '{name}': {exc}") from exc
136
+ custom_detectors[name] = pattern
137
+
138
+ # Automatically activate custom detectors so users only need to define regex once.
139
+ detectors = set(detectors_raw) | set(custom_detectors.keys())
140
+ return Policy(mode=mode, detectors=detectors, custom_detectors=custom_detectors)
141
+
142
+
143
+ def unknown_detector_names(policy: Policy) -> list[str]:
144
+ built_in_names = {detector.name for detector in DETECTORS}
145
+ allowed = built_in_names | set(policy.custom_detectors.keys())
146
+ return sorted(name for name in policy.detectors if name not in allowed)
@@ -0,0 +1,250 @@
1
+ Metadata-Version: 2.4
2
+ Name: contextduty
3
+ Version: 0.1.0
4
+ Summary: Policy-driven context firewall for AI workflows — scan and redact sensitive data before prompts, logs, or traces leave your environment.
5
+ Author: ContextDuty Contributors
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/SHUBHAGYTA24/contextduty
8
+ Project-URL: Repository, https://github.com/SHUBHAGYTA24/contextduty
9
+ Project-URL: Bug Tracker, https://github.com/SHUBHAGYTA24/contextduty/issues
10
+ Project-URL: Changelog, https://github.com/SHUBHAGYTA24/contextduty/blob/main/CHANGELOG.md
11
+ Keywords: security,pii,redaction,ai,llm,mcp,prompt,privacy,devsecops,secrets
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Security
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Classifier: Topic :: System :: Logging
22
+ Requires-Python: >=3.10
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Provides-Extra: dev
26
+ Requires-Dist: pytest>=7; extra == "dev"
27
+ Dynamic: license-file
28
+
29
+ # ContextDuty
30
+
31
+ > A policy-driven context firewall for AI workflows. Scan and redact sensitive data before prompts, logs, or traces leave your environment — locally, with no cloud calls.
32
+
33
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/)
34
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
35
+ [![MCP Compatible](https://img.shields.io/badge/MCP-compatible-purple.svg)](https://modelcontextprotocol.io)
36
+
37
+ ---
38
+
39
+ ## Why ContextDuty
40
+
41
+ AI coding assistants and agent workflows are spreading fast. So is accidental data leakage — API keys, emails, and PII flowing into prompts, logs, and traces that may be stored or sent to third-party services.
42
+
43
+ ContextDuty is a **local-first, policy-layered primitive** that fits into any workflow:
44
+ - **CLI** — pipe files through it in CI or pre-commit hooks
45
+ - **MCP server** — Cursor, VS Code, and any MCP client get automatic redaction
46
+ - **Policy inheritance** — teams extend org-wide baselines without copying rules
47
+
48
+ ---
49
+
50
+ ## Why not Presidio?
51
+
52
+ [Microsoft Presidio](https://github.com/microsoft/presidio) is great for NER-based PII detection in data pipelines. ContextDuty solves a different problem:
53
+
54
+ | | ContextDuty | Presidio |
55
+ |---|---|---|
56
+ | Target use case | AI prompts, logs, agent traces | Data pipelines, analytics |
57
+ | MCP-native | ✅ | ❌ |
58
+ | Policy layering (`extends`) | ✅ | ❌ |
59
+ | `block` mode for CI | ✅ | ❌ |
60
+ | Zero dependencies | ✅ | ❌ (heavy NLP stack) |
61
+ | Custom detectors (no code) | ✅ (regex in JSON) | Partial |
62
+ | Deployment | Local CLI / subprocess | Service / SDK |
63
+
64
+ Use Presidio when you need ML-based entity recognition at scale. Use ContextDuty when you need a lightweight, policy-enforceable firewall close to your AI toolchain.
65
+
66
+ ---
67
+
68
+ ## Detection coverage
69
+
70
+ | Detector | Example input | Masked as |
71
+ |---|---|---|
72
+ | `email` | `jane@corp.com` | `<EMAIL_a1b2c3d4e5>` |
73
+ | `phone` | `+1 415-555-1212` | `<PHONE_f6g7h8i9j0>` |
74
+ | `api_key` | `sk_live_ABC123...` | `<API_KEY_k1l2m3n4o5>` |
75
+ | `aws_key` | `AKIA1234567890ABCDEF` | `<AWS_KEY_p6q7r8s9t0>` |
76
+ | `bearer_token` | `Bearer eyJhbGci...` | `<BEARER_TOKEN_u1v2w3x4y5>` |
77
+
78
+ Masks are **deterministic** — the same value always produces the same mask, so you can correlate across log lines without exposing the raw value.
79
+
80
+ ---
81
+
82
+ ## Quickstart
83
+
84
+ ```bash
85
+ pip install contextduty
86
+ contextduty init
87
+ ```
88
+
89
+ Then scan and redact:
90
+
91
+ ```bash
92
+ contextduty scan sample.txt --report report.json
93
+ contextduty redact --in sample.txt --out clean.txt --report report.json
94
+ ```
95
+
96
+ ---
97
+
98
+ ## Commands
99
+
100
+ | Command | Description |
101
+ |---|---|
102
+ | `contextduty init` | Create `.contextduty.json` in the current directory |
103
+ | `contextduty scan <file>` | Scan file, print JSON findings report |
104
+ | `contextduty redact --in <f> --out <f>` | Redact matches, write clean file |
105
+ | `contextduty policy validate --policy <f> [--strict]` | Validate and resolve a layered policy |
106
+
107
+ ---
108
+
109
+ ## MCP server (Cursor / VS Code / any MCP client)
110
+
111
+ ContextDuty runs as an MCP stdio server — drop it into your editor config and every file your agent touches is scanned automatically.
112
+
113
+ ```bash
114
+ contextduty-mcp
115
+ ```
116
+
117
+ **Cursor** — add to `~/.cursor/mcp.json`:
118
+ ```json
119
+ {
120
+ "mcpServers": {
121
+ "contextduty": {
122
+ "command": "contextduty-mcp"
123
+ }
124
+ }
125
+ }
126
+ ```
127
+
128
+ Exposed tools:
129
+ - `contextduty_scan` (`path`, optional `policyPath`)
130
+ - `contextduty_redact` (`inputPath`, `outputPath`, optional `policyPath`)
131
+
132
+ ---
133
+
134
+ ## Policy file
135
+
136
+ Default `.contextduty.json`:
137
+
138
+ ```json
139
+ {
140
+ "mode": "redact",
141
+ "detectors": ["email", "phone", "api_key", "aws_key", "bearer_token"],
142
+ "custom_detectors": {}
143
+ }
144
+ ```
145
+
146
+ **Add a custom detector without touching code:**
147
+
148
+ ```json
149
+ {
150
+ "mode": "redact",
151
+ "detectors": ["email"],
152
+ "custom_detectors": {
153
+ "employee_id": "\\bEMP-[0-9]{6}\\b",
154
+ "internal_ticket": "\\bTICKET-[A-Z]{3}-[0-9]{4}\\b"
155
+ }
156
+ }
157
+ ```
158
+
159
+ `custom_detectors` are auto-enabled — just add the regex entry.
160
+
161
+ **Policy layering for teams and enterprises:**
162
+
163
+ ```json
164
+ {
165
+ "extends": "../../policies/org-baseline.json",
166
+ "mode": "block",
167
+ "detectors": ["internal_ticket"],
168
+ "custom_detectors": {
169
+ "internal_ticket": "\\bTICKET-[A-Z]{3}-[0-9]{4}\\b"
170
+ }
171
+ }
172
+ ```
173
+
174
+ Rules:
175
+ - `extends` can be a string or list (relative file paths)
176
+ - `detectors` are merged (parent + child)
177
+ - `custom_detectors` are merged (child overrides same key)
178
+ - `mode` is overridden by the child policy
179
+ - Cycles in `extends` are rejected with a clear error
180
+
181
+ **Modes:**
182
+
183
+ | Mode | Behaviour |
184
+ |---|---|
185
+ | `redact` | Replace matched values with deterministic masks |
186
+ | `warn` | Report findings, do not change content |
187
+ | `block` | Exit non-zero if findings exist (CI enforcement) |
188
+
189
+ ---
190
+
191
+ ## Compliance policy packs
192
+
193
+ Ready-made baselines for common frameworks — extend them in your own policy file:
194
+
195
+ | Pack | Path | Detectors included |
196
+ |---|---|---|
197
+ | SOC 2 | `policies/soc2-baseline.json` | email, phone, api_key, aws_key, bearer_token |
198
+ | HIPAA | `policies/hipaa-baseline.json` | email, phone + PHI custom patterns |
199
+
200
+ Usage:
201
+ ```json
202
+ {
203
+ "extends": "./node_modules/contextduty/policies/soc2-baseline.json",
204
+ "mode": "block"
205
+ }
206
+ ```
207
+
208
+ ---
209
+
210
+ ## CI integration
211
+
212
+ Add a pre-push check to block accidental secret commits:
213
+
214
+ ```yaml
215
+ # .github/workflows/contextduty.yml
216
+ - name: Scan for secrets
217
+ run: |
218
+ pip install contextduty
219
+ contextduty scan . --policy .contextduty.json
220
+ ```
221
+
222
+ Or use `mode: block` in your policy to make `contextduty scan` exit non-zero on any finding.
223
+
224
+ ---
225
+
226
+ ## Roadmap
227
+
228
+ - [ ] PyPI publish (`pip install contextduty`)
229
+ - [ ] Streaming JSONL mode for multi-GB datasets
230
+ - [ ] VS Code extension
231
+ - [ ] Policy packs for PCI-DSS
232
+ - [ ] GitHub Action (`uses: contextduty/action@v1`)
233
+
234
+ ---
235
+
236
+ ## Open source
237
+
238
+ | File | Purpose |
239
+ |---|---|
240
+ | `LICENSE` | MIT |
241
+ | `SECURITY.md` | Vulnerability reporting |
242
+ | `CONTRIBUTING.md` | How to contribute |
243
+ | `CODE_OF_CONDUCT.md` | Community standards |
244
+ | `CHANGELOG.md` | Version history |
245
+
246
+ ---
247
+
248
+ ## Contributing
249
+
250
+ Issues, PRs, and policy pack contributions are very welcome. See [CONTRIBUTING.md](CONTRIBUTING.md) to get started.
@@ -0,0 +1,12 @@
1
+ contextduty/__init__.py,sha256=RsYWwXsCEQFdXkuVg6YAvEgMaB2O7Ui-7kaIVx69k1I,94
2
+ contextduty/cli.py,sha256=CGrAXIb4GgwzXgGQFXUhEx5A1z3JzutQ5hGCXbY8UX4,4837
3
+ contextduty/detectors.py,sha256=wGzUtWX5lOaeIPhEtX1ZkvzPSPBh_jUeLbBZUXydbqU,932
4
+ contextduty/engine.py,sha256=KIStKqJgu6AWmjbJfqrYNiaxO_exSOVf7s2736eoutA,4442
5
+ contextduty/mcp_server.py,sha256=T2453N2UzQag1zAtvtwnEUnY9Mw-KJXvKf0TiLEvIPg,7671
6
+ contextduty/policy.py,sha256=Bh_5x0LOtDkSUgaLPROWkSsDYo9YGIWIVee0-waOUHA,5401
7
+ contextduty-0.1.0.dist-info/licenses/LICENSE,sha256=vR5-qkFfm6Jm_XXmjmaXDwpoRVNuf7vVXiCmqwpUWqc,1081
8
+ contextduty-0.1.0.dist-info/METADATA,sha256=gRS2NkYtKMLRaQ9VvlkvahqqZzx972wmEbFo93PLMvo,7341
9
+ contextduty-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
10
+ contextduty-0.1.0.dist-info/entry_points.txt,sha256=Pf4EFfq94zaghOkbljA5naebe82r5sIgLuuY1gWlq1s,99
11
+ contextduty-0.1.0.dist-info/top_level.txt,sha256=rNp859RPbeDgQ_q--0-JeSV0uBsfzWNU5-gEdukJ108,12
12
+ contextduty-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ contextduty = contextduty.cli:main
3
+ contextduty-mcp = contextduty.mcp_server:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 ContextDuty Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ contextduty