draft-protocol 1.1.0__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- draft_protocol-1.2.0/CHANGELOG.md +115 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/PKG-INFO +27 -11
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/README.md +26 -10
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/pyproject.toml +1 -1
- draft_protocol-1.2.0/scripts/tmp/run_lint.ps1 +3 -0
- draft_protocol-1.2.0/scripts/tmp/run_new_tests.ps1 +3 -0
- draft_protocol-1.2.0/scripts/tmp/run_tests.ps1 +4 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/src/draft_protocol/__init__.py +32 -1
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/src/draft_protocol/config.py +119 -15
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/src/draft_protocol/engine.py +785 -46
- draft_protocol-1.2.0/src/draft_protocol/extension_points.py +74 -0
- draft_protocol-1.2.0/src/draft_protocol/hmac_utils.py +71 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/src/draft_protocol/server.py +108 -50
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/src/draft_protocol/storage.py +20 -1
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/tests/test_draft_protocol.py +12 -8
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/tests/test_rest.py +3 -3
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/tests/test_security.py +3 -3
- draft_protocol-1.2.0/tests/test_v1_1_features.py +386 -0
- draft_protocol-1.2.0/tests/test_v1_2_features.py +354 -0
- draft_protocol-1.1.0/CHANGELOG.md +0 -62
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/.dockerignore +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/.editorconfig +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/.github/CODEOWNERS +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/.github/dependabot.yml +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/.github/workflows/ci.yml +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/.github/workflows/release.yml +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/.gitignore +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/.pre-commit-config.yaml +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/AGENTS.md +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/BENCHMARKS.md +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/CODE_OF_CONDUCT.md +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/CONFORMANCE.md +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/CONTRIBUTING.md +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/Dockerfile +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/INTEGRATIONS.md +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/LICENSE +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/METHODOLOGY.md +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/Makefile +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/RELEASING.md +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/ROADMAP.md +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/RULES.md +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/SECURITY.md +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/STRUCTURE.md +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/THREAT_MODEL.md +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/docker-compose.example.yml +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/docs/README.md +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/docs/api.md +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/docs/architecture.md +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/examples/README.md +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/examples/basic_usage.py +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/extension/background.js +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/extension/content.css +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/extension/content.js +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/extension/icons/icon128.png +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/extension/icons/icon16.png +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/extension/icons/icon48.png +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/extension/manifest.json +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/extension/popup.html +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/extension/popup.js +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/extension/sidepanel.html +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/extension/sidepanel.js +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/src/draft_protocol/__main__.py +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/src/draft_protocol/providers.py +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/src/draft_protocol/py.typed +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/src/draft_protocol/rest.py +0 -0
- {draft_protocol-1.1.0 → draft_protocol-1.2.0}/tests/conftest.py +0 -0
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [1.2.0] - 2026-03-08
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- **5-Tier Classification** — TRIVIAL, LOOKUP, TASK, MULTI, CONSEQUENTIAL replace the 3-tier CASUAL/STANDARD/CONSEQUENTIAL system. Finer-grained governance matching actual task complexity.
|
|
14
|
+
- **Open Elicitation Phase** — `open_elicitation()` adds unstructured intent gathering (Cognitive Interview) before dimension mapping for TASK+ tiers. Prevents anchoring bias.
|
|
15
|
+
- **Assumption Quality Scoring** — `score_assumptions()` rates each assumption on falsifiability, impact, and novelty (0-1 each). Low-quality assumptions flagged for replacement.
|
|
16
|
+
- **Ceremony Depth** — `get_ceremony_depth()` returns tier-appropriate governance visibility: invisible (TRIVIAL), tag (LOOKUP), semi_visible (TASK), visible (MULTI), full (CONSEQUENTIAL).
|
|
17
|
+
- **Legacy Tier Compatibility** — CASUAL/STANDARD/CONSEQUENTIAL still accepted, auto-mapped to new tiers. `resolve_tier_override()` and `get_legacy_tier()` for bidirectional mapping.
|
|
18
|
+
- **MCP Tool: `draft_open_elicit`** — open elicitation as a dedicated tool for TASK+ sessions.
|
|
19
|
+
- 57 new tests covering 5-tier classification, open elicitation, ceremony depth, legacy compat, assumption scoring. Total suite: 197 tests.
|
|
20
|
+
|
|
21
|
+
### Changed
|
|
22
|
+
- Tier classification engine now uses 5-tier keyword sets with priority ordering (T4 > T3 > T2 > T1 > T0)
|
|
23
|
+
- Extraction attack patterns moved from STANDARD_TRIGGERS to CONSEQUENTIAL_TRIGGERS (security fix)
|
|
24
|
+
- `create_session()` auto-maps legacy tier names to 5-tier equivalents
|
|
25
|
+
- Assumption count scales across 5 tiers: 0 (TRIVIAL), 1 (LOOKUP), 2 (TASK), 3 (MULTI), 5 (CONSEQUENTIAL)
|
|
26
|
+
|
|
27
|
+
## [1.1.0] - 2026-03-07
|
|
28
|
+
|
|
29
|
+
### Added
|
|
30
|
+
- **Batch Operations** — `confirm_batch`, `quick_confirm_satisfied`, `verify_batch` reduce tool call overhead by 50-60% per session
|
|
31
|
+
- **LLM-Powered Adversarial Assumptions** — when LLM is available, generates genuinely falsifiable claims instead of restating confirmed fields (fixes CF-011 rubber-stamp problem)
|
|
32
|
+
- **Devil's Advocate at All Tiers** — scaled intensity: CASUAL 1-2, STANDARD 2-3, CONSEQUENTIAL 3-5 assumptions
|
|
33
|
+
- **Hard Extraction Enforcement** — strips fabricated extracted text from AMBIGUOUS/MISSING fields in LLM assessments
|
|
34
|
+
- **Escalation/De-escalation** — `escalate_tier` and `deescalate_tier` with audit trail and reason logging
|
|
35
|
+
- **Collaborative Framing** — PEACE + Motivational Interviewing framing hints on all elicitation questions
|
|
36
|
+
- **Perfunctory Confirmation Detection** — warns on repeated identical values, known rubber-stamp patterns (DFT-08)
|
|
37
|
+
- **Session Analytics** — `elicitation_review` includes metrics: field counts, confidence distribution, assumption rejection rate
|
|
38
|
+
- **M1.3 Closed Session Guards** — all engine functions reject operations on closed sessions
|
|
39
|
+
- **M1.4 Tier Enum Validation** — strict tier validation at intake
|
|
40
|
+
- **M1.5 Context Enrichment on Gate PASS** — gate results include full dimensional context
|
|
41
|
+
- 35 new v1.1 feature tests + 25 hardening tests, total suite: 140 tests
|
|
42
|
+
- 18 MCP tools (3 new: `draft_confirm_batch`, `draft_quick_confirm`, `draft_verify_batch`; 2 new: `draft_escalate`, `draft_deescalate`)
|
|
43
|
+
|
|
44
|
+
### Changed
|
|
45
|
+
- Assumption generation scales by tier instead of fixed count
|
|
46
|
+
- Gate results include perfunctory warnings alongside blockers
|
|
47
|
+
- Elicitation review returns analytics block with session-level metrics
|
|
48
|
+
- Test suite expanded from 80 to 140 tests, zero regressions
|
|
49
|
+
|
|
50
|
+
## [1.0.0] - 2026-02-27
|
|
51
|
+
|
|
52
|
+
### Added
|
|
53
|
+
- First stable release — all core features production-hardened
|
|
54
|
+
- Python Semantic Release (PSR) automated CI/CD pipeline
|
|
55
|
+
- Automated version bumps, changelogs, tagging, and PyPI publishing
|
|
56
|
+
|
|
57
|
+
### Changed
|
|
58
|
+
- Promoted from 0.x to 1.0 — API considered stable
|
|
59
|
+
|
|
60
|
+
## [0.1.1] - 2026-02-25
|
|
61
|
+
|
|
62
|
+
### Added
|
|
63
|
+
- `RELEASING.md` — release gate checklist to prevent builder's blindness
|
|
64
|
+
- Minimal working example (end-to-end transcript) in README
|
|
65
|
+
- Conformance quick-scan index table at top of `CONFORMANCE.md`
|
|
66
|
+
- `docs/architecture.md` — system design, pipeline flow, security model, file layout
|
|
67
|
+
- `docs/api.md` — REST API reference with all endpoints, request/response examples
|
|
68
|
+
- `examples/basic_usage.py` — library usage example (no server needed)
|
|
69
|
+
- `tests/test_security.py` — prompt injection, bypass, input validation tests
|
|
70
|
+
- `tests/test_rest.py` — REST API endpoint tests with mock handler
|
|
71
|
+
- `Dockerfile` — production container (Python 3.13-slim, non-root, SSE default)
|
|
72
|
+
- `docker-compose.example.yml` — example stack with DRAFT + Ollama
|
|
73
|
+
- `.dockerignore` — keep Docker image clean
|
|
74
|
+
- `.github/ISSUE_TEMPLATE/bug_report.yml` — structured bug report template
|
|
75
|
+
- `.github/ISSUE_TEMPLATE/feature_request.yml` — structured feature request template
|
|
76
|
+
- `.github/PULL_REQUEST_TEMPLATE.md` — PR checklist
|
|
77
|
+
|
|
78
|
+
### Fixed
|
|
79
|
+
- `rest.py` `/status` endpoint called nonexistent `storage.get_session_state()` — replaced with inline session + gate query
|
|
80
|
+
- Documented Anthropic embeddings limitation (voyage model not supported, use `text-embedding-3-small` or Ollama)
|
|
81
|
+
- Added localhost-only security warning to REST API docs
|
|
82
|
+
|
|
83
|
+
### Changed
|
|
84
|
+
- `docs/README.md` updated to documentation index linking all docs
|
|
85
|
+
- `STRUCTURE.md` updated to reflect all new files
|
|
86
|
+
|
|
87
|
+
## [0.1.0] - 2025-02-21
|
|
88
|
+
|
|
89
|
+
### Added
|
|
90
|
+
- 15 MCP tools for structured intent elicitation via FastMCP
|
|
91
|
+
- Three-tier automatic classification: Casual, Standard, Consequential
|
|
92
|
+
- Five-dimension mapping: Define, Rules, Artifacts, Flex, Test
|
|
93
|
+
- Confirmation gate blocks execution until all fields verified
|
|
94
|
+
- Assumptions surfacing with Devil's Advocate support
|
|
95
|
+
- Dimension screening for non-mandatory dimensions (R, A, F)
|
|
96
|
+
- Gate override with audit trail for founder use
|
|
97
|
+
- Elicitation review with quality self-assessment
|
|
98
|
+
- Multi-provider LLM support: Ollama, OpenAI, Anthropic, any OpenAI-compatible API
|
|
99
|
+
- Auto-detection of provider from model name
|
|
100
|
+
- Graceful degradation to keyword heuristics without LLM
|
|
101
|
+
- Prompt extraction attack detection (OWASP LLM07)
|
|
102
|
+
- Empty/whitespace input rejection at all entry points
|
|
103
|
+
- Full SQLite audit trail
|
|
104
|
+
- REST API with CORS for Chrome extension and HTTP clients
|
|
105
|
+
- Chrome extension for any AI chat (ChatGPT, Claude, Gemini, etc.)
|
|
106
|
+
- 46 tests covering security, lifecycle, governance, and provider configuration
|
|
107
|
+
- AGENTS.md, RULES.md, STRUCTURE.md for AI agent compatibility
|
|
108
|
+
- Professional repo infrastructure: CONTRIBUTING, SECURITY, CODE_OF_CONDUCT
|
|
109
|
+
|
|
110
|
+
[Unreleased]: https://github.com/manifold-vectors/draft-protocol/compare/v1.2.0...HEAD
|
|
111
|
+
[1.2.0]: https://github.com/manifold-vectors/draft-protocol/compare/v1.1.0...v1.2.0
|
|
112
|
+
[1.1.0]: https://github.com/manifold-vectors/draft-protocol/compare/v1.0.0...v1.1.0
|
|
113
|
+
[1.0.0]: https://github.com/manifold-vectors/draft-protocol/compare/v0.1.1...v1.0.0
|
|
114
|
+
[0.1.1]: https://github.com/manifold-vectors/draft-protocol/compare/v0.1.0...v0.1.1
|
|
115
|
+
[0.1.0]: https://github.com/manifold-vectors/draft-protocol/releases/tag/v0.1.0
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: draft-protocol
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: DRAFT Protocol — Intake governance for AI tool calls. Ensures AI understands human intent before execution begins.
|
|
5
5
|
Project-URL: Homepage, https://github.com/manifold-vectors/draft-protocol
|
|
6
6
|
Project-URL: Documentation, https://github.com/manifold-vectors/draft-protocol#readme
|
|
@@ -56,7 +56,7 @@ pip install draft-protocol
|
|
|
56
56
|
| **When it acts** | After the LLM responds | Before the LLM acts |
|
|
57
57
|
| **What it checks** | Toxicity, format, policy | Intent, scope, assumptions |
|
|
58
58
|
| **Failure mode** | Catches bad output, wastes the call | Prevents bad calls entirely |
|
|
59
|
-
| **Evidence basis** | Synthetic benchmarks |
|
|
59
|
+
| **Evidence basis** | Synthetic benchmarks | 140 tests + governed sessions |
|
|
60
60
|
| **Complementary?** | Yes | Yes — use both for defense-in-depth |
|
|
61
61
|
|
|
62
62
|
## The Problem
|
|
@@ -85,11 +85,22 @@ Each field is labeled **SATISFIED**, **AMBIGUOUS**, or **MISSING**. Ambiguous an
|
|
|
85
85
|
|
|
86
86
|
Not every message needs the same scrutiny:
|
|
87
87
|
|
|
88
|
-
- **CASUAL** — "What's the weather?" → Internal mapping only. No visible ceremony.
|
|
89
|
-
- **STANDARD** — "Build a REST API" → Full pipeline. Questions for gaps. Assumptions surfaced.
|
|
90
|
-
- **CONSEQUENTIAL** — "Restructure the auth system" → Maximum rigor. All dimensions mandatory. Devil's Advocate
|
|
88
|
+
- **CASUAL** — "What's the weather?" → Internal mapping only. No visible ceremony. 1-2 lightweight assumptions.
|
|
89
|
+
- **STANDARD** — "Build a REST API" → Full pipeline. Questions for gaps. Assumptions surfaced with light Devil's Advocate. Batch confirm for efficiency.
|
|
90
|
+
- **CONSEQUENTIAL** — "Restructure the auth system" → Maximum rigor. All dimensions mandatory. 3-5 adversarial assumptions with full Devil's Advocate. Quality review required. Perfunctory confirmation detection.
|
|
91
91
|
|
|
92
|
-
Tier classification is automatic (keyword matching + optional LLM), with manual
|
|
92
|
+
Tier classification is automatic (keyword matching + optional LLM), with manual escalation/de-escalation.
|
|
93
|
+
|
|
94
|
+
### What's New in v1.1
|
|
95
|
+
|
|
96
|
+
- **Batch operations** — `confirm_batch`, `quick_confirm`, `verify_batch` cut tool call overhead by 50-60%
|
|
97
|
+
- **Adversarial assumptions** — LLM-powered assumption generation creates genuinely falsifiable claims instead of restating confirmed fields
|
|
98
|
+
- **Devil's Advocate at all tiers** — scaled intensity: 1-2 (casual), 2-3 (standard), 3-5 (consequential)
|
|
99
|
+
- **Hard extraction enforcement** — strips fabricated text from ambiguous/missing fields
|
|
100
|
+
- **Collaborative framing** — elicitation questions use PEACE + Motivational Interviewing framing
|
|
101
|
+
- **Perfunctory detection** — warns on rubber-stamp confirmations ("yes", repeated values)
|
|
102
|
+
- **Session analytics** — field counts, confidence distribution, assumption rejection rates
|
|
103
|
+
- **Escalate/de-escalate** — manual tier changes with full audit trail
|
|
93
104
|
|
|
94
105
|
## Quick Start
|
|
95
106
|
|
|
@@ -301,15 +312,18 @@ Add the `env` block to any MCP client config above. With an LLM, DRAFT gets sema
|
|
|
301
312
|
|------|---------|
|
|
302
313
|
| `draft_intake` | Start a session. Classifies tier automatically. |
|
|
303
314
|
| `draft_map` | Map all 5 dimensions against your context. |
|
|
304
|
-
| `draft_elicit` | Generate questions for gaps. |
|
|
315
|
+
| `draft_elicit` | Generate questions for gaps (with collaborative framing). |
|
|
305
316
|
| `draft_confirm` | Record your answer for a field. |
|
|
306
|
-
| `
|
|
317
|
+
| `draft_confirm_batch` | Confirm multiple fields in one call (50-60% fewer tool calls). |
|
|
318
|
+
| `draft_quick_confirm` | Promote all auto-extracted fields to confirmed in one call. |
|
|
319
|
+
| `draft_assumptions` | Surface key assumptions as falsifiable claims (tier-scaled DA). |
|
|
307
320
|
| `draft_verify` | Confirm or reject an assumption. |
|
|
321
|
+
| `draft_verify_batch` | Verify or reject multiple assumptions in one call. |
|
|
308
322
|
| `draft_gate` | Check if all fields are confirmed. Blocks execution if not. |
|
|
309
|
-
| `draft_review` | Quality self-assessment
|
|
323
|
+
| `draft_review` | Quality self-assessment with session analytics. |
|
|
310
324
|
| `draft_status` | View current session state. |
|
|
311
|
-
| `draft_escalate` | Manually increase tier. |
|
|
312
|
-
| `draft_deescalate` | Manually decrease tier (logged). |
|
|
325
|
+
| `draft_escalate` | Manually increase tier (with audit trail). |
|
|
326
|
+
| `draft_deescalate` | Manually decrease tier (logged, honored). |
|
|
313
327
|
| `draft_unscreen` | Reverse a dimension marked N/A. |
|
|
314
328
|
| `draft_add_assumption` | Add a manual or Devil's Advocate assumption. |
|
|
315
329
|
| `draft_override` | Override a blocked gate (logged, auditable). |
|
|
@@ -383,6 +397,8 @@ curl -s -X POST http://127.0.0.1:8420/gate \
|
|
|
383
397
|
DRAFT includes hardened input validation:
|
|
384
398
|
- Empty/whitespace message rejection at intake
|
|
385
399
|
- Minimum content threshold on field confirmations (prevents bypass)
|
|
400
|
+
- Perfunctory confirmation detection — warns on rubber-stamp patterns (DFT-08)
|
|
401
|
+
- Hard extraction enforcement — strips fabricated text from non-satisfied fields
|
|
386
402
|
- Empty dimension detection at gate check
|
|
387
403
|
- Prompt extraction pattern detection (OWASP LLM07) — automatically escalates suspicious messages
|
|
388
404
|
- Full audit trail in SQLite (every tool call logged with timestamp)
|
|
@@ -18,7 +18,7 @@ pip install draft-protocol
|
|
|
18
18
|
| **When it acts** | After the LLM responds | Before the LLM acts |
|
|
19
19
|
| **What it checks** | Toxicity, format, policy | Intent, scope, assumptions |
|
|
20
20
|
| **Failure mode** | Catches bad output, wastes the call | Prevents bad calls entirely |
|
|
21
|
-
| **Evidence basis** | Synthetic benchmarks |
|
|
21
|
+
| **Evidence basis** | Synthetic benchmarks | 140 tests + governed sessions |
|
|
22
22
|
| **Complementary?** | Yes | Yes — use both for defense-in-depth |
|
|
23
23
|
|
|
24
24
|
## The Problem
|
|
@@ -47,11 +47,22 @@ Each field is labeled **SATISFIED**, **AMBIGUOUS**, or **MISSING**. Ambiguous an
|
|
|
47
47
|
|
|
48
48
|
Not every message needs the same scrutiny:
|
|
49
49
|
|
|
50
|
-
- **CASUAL** — "What's the weather?" → Internal mapping only. No visible ceremony.
|
|
51
|
-
- **STANDARD** — "Build a REST API" → Full pipeline. Questions for gaps. Assumptions surfaced.
|
|
52
|
-
- **CONSEQUENTIAL** — "Restructure the auth system" → Maximum rigor. All dimensions mandatory. Devil's Advocate
|
|
50
|
+
- **CASUAL** — "What's the weather?" → Internal mapping only. No visible ceremony. 1-2 lightweight assumptions.
|
|
51
|
+
- **STANDARD** — "Build a REST API" → Full pipeline. Questions for gaps. Assumptions surfaced with light Devil's Advocate. Batch confirm for efficiency.
|
|
52
|
+
- **CONSEQUENTIAL** — "Restructure the auth system" → Maximum rigor. All dimensions mandatory. 3-5 adversarial assumptions with full Devil's Advocate. Quality review required. Perfunctory confirmation detection.
|
|
53
53
|
|
|
54
|
-
Tier classification is automatic (keyword matching + optional LLM), with manual
|
|
54
|
+
Tier classification is automatic (keyword matching + optional LLM), with manual escalation/de-escalation.
|
|
55
|
+
|
|
56
|
+
### What's New in v1.1
|
|
57
|
+
|
|
58
|
+
- **Batch operations** — `confirm_batch`, `quick_confirm`, `verify_batch` cut tool call overhead by 50-60%
|
|
59
|
+
- **Adversarial assumptions** — LLM-powered assumption generation creates genuinely falsifiable claims instead of restating confirmed fields
|
|
60
|
+
- **Devil's Advocate at all tiers** — scaled intensity: 1-2 (casual), 2-3 (standard), 3-5 (consequential)
|
|
61
|
+
- **Hard extraction enforcement** — strips fabricated text from ambiguous/missing fields
|
|
62
|
+
- **Collaborative framing** — elicitation questions use PEACE + Motivational Interviewing framing
|
|
63
|
+
- **Perfunctory detection** — warns on rubber-stamp confirmations ("yes", repeated values)
|
|
64
|
+
- **Session analytics** — field counts, confidence distribution, assumption rejection rates
|
|
65
|
+
- **Escalate/de-escalate** — manual tier changes with full audit trail
|
|
55
66
|
|
|
56
67
|
## Quick Start
|
|
57
68
|
|
|
@@ -263,15 +274,18 @@ Add the `env` block to any MCP client config above. With an LLM, DRAFT gets sema
|
|
|
263
274
|
|------|---------|
|
|
264
275
|
| `draft_intake` | Start a session. Classifies tier automatically. |
|
|
265
276
|
| `draft_map` | Map all 5 dimensions against your context. |
|
|
266
|
-
| `draft_elicit` | Generate questions for gaps. |
|
|
277
|
+
| `draft_elicit` | Generate questions for gaps (with collaborative framing). |
|
|
267
278
|
| `draft_confirm` | Record your answer for a field. |
|
|
268
|
-
| `
|
|
279
|
+
| `draft_confirm_batch` | Confirm multiple fields in one call (50-60% fewer tool calls). |
|
|
280
|
+
| `draft_quick_confirm` | Promote all auto-extracted fields to confirmed in one call. |
|
|
281
|
+
| `draft_assumptions` | Surface key assumptions as falsifiable claims (tier-scaled DA). |
|
|
269
282
|
| `draft_verify` | Confirm or reject an assumption. |
|
|
283
|
+
| `draft_verify_batch` | Verify or reject multiple assumptions in one call. |
|
|
270
284
|
| `draft_gate` | Check if all fields are confirmed. Blocks execution if not. |
|
|
271
|
-
| `draft_review` | Quality self-assessment
|
|
285
|
+
| `draft_review` | Quality self-assessment with session analytics. |
|
|
272
286
|
| `draft_status` | View current session state. |
|
|
273
|
-
| `draft_escalate` | Manually increase tier. |
|
|
274
|
-
| `draft_deescalate` | Manually decrease tier (logged). |
|
|
287
|
+
| `draft_escalate` | Manually increase tier (with audit trail). |
|
|
288
|
+
| `draft_deescalate` | Manually decrease tier (logged, honored). |
|
|
275
289
|
| `draft_unscreen` | Reverse a dimension marked N/A. |
|
|
276
290
|
| `draft_add_assumption` | Add a manual or Devil's Advocate assumption. |
|
|
277
291
|
| `draft_override` | Override a blocked gate (logged, auditable). |
|
|
@@ -345,6 +359,8 @@ curl -s -X POST http://127.0.0.1:8420/gate \
|
|
|
345
359
|
DRAFT includes hardened input validation:
|
|
346
360
|
- Empty/whitespace message rejection at intake
|
|
347
361
|
- Minimum content threshold on field confirmations (prevents bypass)
|
|
362
|
+
- Perfunctory confirmation detection — warns on rubber-stamp patterns (DFT-08)
|
|
363
|
+
- Hard extraction enforcement — strips fabricated text from non-satisfied fields
|
|
348
364
|
- Empty dimension detection at gate check
|
|
349
365
|
- Prompt extraction pattern detection (OWASP LLM07) — automatically escalates suspicious messages
|
|
350
366
|
- Full audit trail in SQLite (every tool call logged with timestamp)
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "draft-protocol"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.2.0"
|
|
8
8
|
description = "DRAFT Protocol — Intake governance for AI tool calls. Ensures AI understands human intent before execution begins."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "Apache-2.0"
|
|
@@ -13,21 +13,37 @@ Usage:
|
|
|
13
13
|
from draft_protocol.providers import llm_available, embed_available
|
|
14
14
|
"""
|
|
15
15
|
|
|
16
|
-
__version__ = "1.
|
|
16
|
+
__version__ = "1.2.0"
|
|
17
17
|
|
|
18
18
|
# Public API — importable from `draft_protocol` directly
|
|
19
19
|
from draft_protocol.engine import (
|
|
20
20
|
add_assumption,
|
|
21
21
|
check_gate,
|
|
22
22
|
classify_tier,
|
|
23
|
+
confirm_batch,
|
|
23
24
|
confirm_field,
|
|
25
|
+
deescalate_tier,
|
|
24
26
|
elicitation_review,
|
|
27
|
+
escalate_tier,
|
|
25
28
|
generate_assumptions,
|
|
26
29
|
generate_elicitation,
|
|
30
|
+
get_ceremony_depth,
|
|
31
|
+
get_legacy_tier,
|
|
27
32
|
map_dimensions,
|
|
33
|
+
open_elicitation,
|
|
28
34
|
override_gate,
|
|
35
|
+
quick_confirm_satisfied,
|
|
36
|
+
resolve_tier_override,
|
|
37
|
+
score_assumptions,
|
|
29
38
|
unscreen_dimension,
|
|
30
39
|
verify_assumption,
|
|
40
|
+
verify_batch,
|
|
41
|
+
)
|
|
42
|
+
from draft_protocol.extension_points import (
|
|
43
|
+
clear_all_hooks,
|
|
44
|
+
register_classify_hook,
|
|
45
|
+
register_post_gate_hook,
|
|
46
|
+
register_storage_path_hook,
|
|
31
47
|
)
|
|
32
48
|
from draft_protocol.providers import (
|
|
33
49
|
embed_available,
|
|
@@ -44,16 +60,26 @@ __all__ = [
|
|
|
44
60
|
"__version__",
|
|
45
61
|
# Engine
|
|
46
62
|
"classify_tier",
|
|
63
|
+
"resolve_tier_override",
|
|
64
|
+
"get_legacy_tier",
|
|
65
|
+
"get_ceremony_depth",
|
|
47
66
|
"map_dimensions",
|
|
48
67
|
"generate_elicitation",
|
|
68
|
+
"open_elicitation",
|
|
49
69
|
"generate_assumptions",
|
|
70
|
+
"score_assumptions",
|
|
50
71
|
"check_gate",
|
|
51
72
|
"confirm_field",
|
|
73
|
+
"confirm_batch",
|
|
74
|
+
"quick_confirm_satisfied",
|
|
75
|
+
"verify_batch",
|
|
52
76
|
"unscreen_dimension",
|
|
53
77
|
"add_assumption",
|
|
54
78
|
"override_gate",
|
|
55
79
|
"verify_assumption",
|
|
56
80
|
"elicitation_review",
|
|
81
|
+
"escalate_tier",
|
|
82
|
+
"deescalate_tier",
|
|
57
83
|
# Storage
|
|
58
84
|
"create_session",
|
|
59
85
|
"get_session",
|
|
@@ -62,4 +88,9 @@ __all__ = [
|
|
|
62
88
|
# Providers
|
|
63
89
|
"llm_available",
|
|
64
90
|
"embed_available",
|
|
91
|
+
# Extension Points
|
|
92
|
+
"register_classify_hook",
|
|
93
|
+
"register_post_gate_hook",
|
|
94
|
+
"register_storage_path_hook",
|
|
95
|
+
"clear_all_hooks",
|
|
65
96
|
]
|
|
@@ -29,6 +29,62 @@ if LLM_PROVIDER == "none" and LLM_MODEL:
|
|
|
29
29
|
elif LLM_MODEL:
|
|
30
30
|
LLM_PROVIDER = "ollama" # Default to Ollama for unknown models
|
|
31
31
|
|
|
32
|
+
# ── 5-Tier Classification (GDE v1 port) ───────────────────
|
|
33
|
+
# Priority: T4 > T3 > T2 > T1 > T0 (highest risk wins)
|
|
34
|
+
|
|
35
|
+
ALL_TIERS = ("TRIVIAL", "LOOKUP", "TASK", "MULTI", "CONSEQUENTIAL")
|
|
36
|
+
|
|
37
|
+
TIER_RISK = {
|
|
38
|
+
"TRIVIAL": 0.0,
|
|
39
|
+
"LOOKUP": 0.1,
|
|
40
|
+
"TASK": 0.3,
|
|
41
|
+
"MULTI": 0.6,
|
|
42
|
+
"CONSEQUENTIAL": 0.9,
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
# Legacy tier mapping (backward compat)
|
|
46
|
+
LEGACY_MAP = {
|
|
47
|
+
"CASUAL": "TRIVIAL",
|
|
48
|
+
"STANDARD": "TASK",
|
|
49
|
+
"CONSEQUENTIAL": "CONSEQUENTIAL",
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
# Reverse: new tier → legacy name (for systems expecting old names)
|
|
53
|
+
TIER_TO_LEGACY = {
|
|
54
|
+
"TRIVIAL": "CASUAL",
|
|
55
|
+
"LOOKUP": "CASUAL",
|
|
56
|
+
"TASK": "STANDARD",
|
|
57
|
+
"MULTI": "STANDARD",
|
|
58
|
+
"CONSEQUENTIAL": "CONSEQUENTIAL",
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
# Per-tier DRAFT ceremony depth
|
|
62
|
+
TIER_CEREMONY = {
|
|
63
|
+
"TRIVIAL": "invisible", # No output — internal only
|
|
64
|
+
"LOOKUP": "tag", # One-line classification tag
|
|
65
|
+
"TASK": "semi_visible", # Summary line + quick_confirm path
|
|
66
|
+
"MULTI": "visible", # Visible mapping + targeted elicitation (D+T min)
|
|
67
|
+
"CONSEQUENTIAL": "full", # Full 7-step, DA, review mandatory
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
# Per-tier assumption counts
|
|
71
|
+
TIER_ASSUMPTIONS = {
|
|
72
|
+
"TRIVIAL": 0,
|
|
73
|
+
"LOOKUP": 1,
|
|
74
|
+
"TASK": 2,
|
|
75
|
+
"MULTI": 3,
|
|
76
|
+
"CONSEQUENTIAL": 5,
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
# Per-tier Guardian rule sets
|
|
80
|
+
TIER_GUARDIAN_RULES = {
|
|
81
|
+
"TRIVIAL": ["G1", "G3"],
|
|
82
|
+
"LOOKUP": ["G1", "G3"],
|
|
83
|
+
"TASK": ["G1", "G2", "G3", "G4", "G5", "G6", "G8"],
|
|
84
|
+
"MULTI": ["G1", "G2", "G3", "G4", "G5", "G6", "G7", "G8"],
|
|
85
|
+
"CONSEQUENTIAL": ["G1", "G2", "G3", "G4", "G5", "G6", "G7", "G8"],
|
|
86
|
+
}
|
|
87
|
+
|
|
32
88
|
# ── Tier Classification Triggers ──────────────────────────
|
|
33
89
|
# Keyword-based fast path. LLM classification (if available) handles ambiguous cases.
|
|
34
90
|
|
|
@@ -53,6 +109,21 @@ CONSEQUENTIAL_TRIGGERS = [
|
|
|
53
109
|
"production deployment",
|
|
54
110
|
"security policy",
|
|
55
111
|
"auth modification",
|
|
112
|
+
# Security: extraction-pattern triggers (OWASP LLM07) — always T4
|
|
113
|
+
"ignore previous instructions",
|
|
114
|
+
"ignore all previous",
|
|
115
|
+
"ignore above",
|
|
116
|
+
"repeat above",
|
|
117
|
+
"repeat everything",
|
|
118
|
+
"verbatim",
|
|
119
|
+
"system prompt",
|
|
120
|
+
"print environment",
|
|
121
|
+
"environment variables",
|
|
122
|
+
"show me your instructions",
|
|
123
|
+
"what are your rules",
|
|
124
|
+
"dump your config",
|
|
125
|
+
"reveal your prompt",
|
|
126
|
+
"debug mode",
|
|
56
127
|
]
|
|
57
128
|
|
|
58
129
|
STANDARD_TRIGGERS = [
|
|
@@ -76,23 +147,56 @@ STANDARD_TRIGGERS = [
|
|
|
76
147
|
"proposal",
|
|
77
148
|
"pipeline",
|
|
78
149
|
"workflow",
|
|
79
|
-
# Security: extraction-pattern triggers (OWASP LLM07)
|
|
80
|
-
"ignore previous instructions",
|
|
81
|
-
"ignore all previous",
|
|
82
|
-
"ignore above",
|
|
83
|
-
"repeat above",
|
|
84
|
-
"repeat everything",
|
|
85
|
-
"verbatim",
|
|
86
|
-
"system prompt",
|
|
87
|
-
"print environment",
|
|
88
|
-
"environment variables",
|
|
89
|
-
"show me your instructions",
|
|
90
|
-
"what are your rules",
|
|
91
|
-
"dump your config",
|
|
92
|
-
"reveal your prompt",
|
|
93
|
-
"debug mode",
|
|
94
150
|
]
|
|
95
151
|
|
|
152
|
+
MULTI_TRIGGERS = [
|
|
153
|
+
"docker-compose",
|
|
154
|
+
"docker compose",
|
|
155
|
+
".env",
|
|
156
|
+
"scheduled task",
|
|
157
|
+
"cron",
|
|
158
|
+
"qdrant",
|
|
159
|
+
"postgresql",
|
|
160
|
+
"postgres",
|
|
161
|
+
"sync",
|
|
162
|
+
"migrate",
|
|
163
|
+
"refactor",
|
|
164
|
+
"sweep",
|
|
165
|
+
"consolidate",
|
|
166
|
+
"rename",
|
|
167
|
+
"restructure",
|
|
168
|
+
"reorg",
|
|
169
|
+
"batch",
|
|
170
|
+
"across multiple",
|
|
171
|
+
"several files",
|
|
172
|
+
"cross-service",
|
|
173
|
+
]
|
|
174
|
+
|
|
175
|
+
LOOKUP_TRIGGERS = [
|
|
176
|
+
"what is",
|
|
177
|
+
"what are",
|
|
178
|
+
"check",
|
|
179
|
+
"status",
|
|
180
|
+
"how does",
|
|
181
|
+
"where is",
|
|
182
|
+
"which",
|
|
183
|
+
"show me",
|
|
184
|
+
"list",
|
|
185
|
+
"describe",
|
|
186
|
+
"explain",
|
|
187
|
+
"verify",
|
|
188
|
+
"look up",
|
|
189
|
+
"find",
|
|
190
|
+
]
|
|
191
|
+
|
|
192
|
+
TRIVIAL_PATTERNS = {
|
|
193
|
+
"yes", "no", "done", "ok", "okay", "sure", "thanks",
|
|
194
|
+
"thank you", "continue", "proceed", "go", "next",
|
|
195
|
+
"hi", "hello", "hey", "good morning", "good evening",
|
|
196
|
+
"bye", "goodbye", "got it", "noted", "roger",
|
|
197
|
+
"yep", "nope", "ack", "k", "y", "n",
|
|
198
|
+
}
|
|
199
|
+
|
|
96
200
|
# ── Dimensions ────────────────────────────────────────────
|
|
97
201
|
# D and T are mandatory; R, A, F can be screened out when inapplicable.
|
|
98
202
|
|