ct-toolkit 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ct_toolkit-0.1.0/PKG-INFO +301 -0
- ct_toolkit-0.1.0/README.md +275 -0
- ct_toolkit-0.1.0/ct_toolkit/__init__.py +38 -0
- ct_toolkit-0.1.0/ct_toolkit/core/__init__.py +0 -0
- ct_toolkit-0.1.0/ct_toolkit/core/compatibility.py +148 -0
- ct_toolkit-0.1.0/ct_toolkit/core/exceptions.py +101 -0
- ct_toolkit-0.1.0/ct_toolkit/core/kernel.py +141 -0
- ct_toolkit-0.1.0/ct_toolkit/core/wrapper.py +404 -0
- ct_toolkit-0.1.0/ct_toolkit/divergence/__init__.py +0 -0
- ct_toolkit-0.1.0/ct_toolkit/divergence/engine.py +286 -0
- ct_toolkit-0.1.0/ct_toolkit/divergence/l2_judge.py +213 -0
- ct_toolkit-0.1.0/ct_toolkit/divergence/l3_icm.py +337 -0
- ct_toolkit-0.1.0/ct_toolkit/endorsement/__init__.py +0 -0
- ct_toolkit-0.1.0/ct_toolkit/endorsement/probes/__init__.py +0 -0
- ct_toolkit-0.1.0/ct_toolkit/endorsement/reflective.py +347 -0
- ct_toolkit-0.1.0/ct_toolkit/identity/__init__.py +0 -0
- ct_toolkit-0.1.0/ct_toolkit/identity/embedding.py +156 -0
- ct_toolkit-0.1.0/ct_toolkit/provenance/__init__.py +0 -0
- ct_toolkit-0.1.0/ct_toolkit/provenance/log.py +270 -0
- ct_toolkit-0.1.0/ct_toolkit/provenance/vault/__init__.py +0 -0
- ct_toolkit-0.1.0/ct_toolkit/utils/__init__.py +0 -0
- ct_toolkit-0.1.0/ct_toolkit/utils/logger.py +14 -0
- ct_toolkit-0.1.0/ct_toolkit.egg-info/PKG-INFO +301 -0
- ct_toolkit-0.1.0/ct_toolkit.egg-info/SOURCES.txt +27 -0
- ct_toolkit-0.1.0/ct_toolkit.egg-info/dependency_links.txt +1 -0
- ct_toolkit-0.1.0/ct_toolkit.egg-info/requires.txt +18 -0
- ct_toolkit-0.1.0/ct_toolkit.egg-info/top_level.txt +1 -0
- ct_toolkit-0.1.0/pyproject.toml +42 -0
- ct_toolkit-0.1.0/setup.cfg +4 -0
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ct-toolkit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Computational Theseus Toolkit — Identity Continuity Guardrails for Agentic Systems
|
|
5
|
+
Author: Hakan Damar
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: llm,ai-safety,identity-continuity,guardrails,alignment
|
|
8
|
+
Requires-Python: >=3.11
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
Requires-Dist: openai>=1.30.0
|
|
11
|
+
Requires-Dist: anthropic>=0.25.0
|
|
12
|
+
Requires-Dist: numpy>=1.26.0
|
|
13
|
+
Requires-Dist: pyyaml>=6.0
|
|
14
|
+
Requires-Dist: pydantic>=2.5.0
|
|
15
|
+
Requires-Dist: cryptography>=42.0.0
|
|
16
|
+
Requires-Dist: SQLAlchemy>=2.0.0
|
|
17
|
+
Requires-Dist: httpx>=0.27.0
|
|
18
|
+
Requires-Dist: rich>=13.0.0
|
|
19
|
+
Provides-Extra: ollama
|
|
20
|
+
Requires-Dist: ollama>=0.2.0; extra == "ollama"
|
|
21
|
+
Provides-Extra: dev
|
|
22
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
23
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
24
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
25
|
+
Requires-Dist: mypy>=1.10; extra == "dev"
|
|
26
|
+
|
|
27
|
+
# Computational Theseus Toolkit (CT Toolkit)
|
|
28
|
+
|
|
29
|
+
> **Identity Continuity Guardrails for LLM Systems**
|
|
30
|
+
|
|
31
|
+
[](https://www.python.org/)
|
|
32
|
+
[](LICENSE)
|
|
33
|
+
[](https://arxiv.org/)
|
|
34
|
+
|
|
35
|
+
CT Toolkit is an open-source security layer designed to preserve the **identity continuity** of large language models over time. It brings to practice the **Nested Agency Architecture (NAA)** framework proposed in the paper [The Computational Theseus](https://hakandamar.com/the-computational-theseus-engineering-identity-continuity-as-a-guardrail-against-sequential-963918c1720d).
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## Why CT Toolkit?
|
|
40
|
+
|
|
41
|
+
An LLM system can deviate from its initial value commitments over different conversations or fine-tune cycles. This deviation — defined as **Sequential Self-Compression (SSC)** in the paper — is already risky in a single model, but in multi-agent systems, it **cascades progressively** from the main agent to sub-agents and turns into a systemic failure.
|
|
42
|
+
|
|
43
|
+
CT Toolkit prevents this issue in three layers:
|
|
44
|
+
|
|
45
|
+
| Layer | Mechanism | What it Provides |
|
|
46
|
+
| ------------------------- | --------------------------------------- | --------------------------------- |
|
|
47
|
+
| **Constitutional Kernel** | Axiomatic + plastic rule hierarchy | Immutable identity anchor |
|
|
48
|
+
| **Divergence Engine** | L1 ECS → L2 LLM-judge → L3 ICM | Divergence detection and grading |
|
|
49
|
+
| **Provenance Log** | HMAC hash chain | Auditable identity history |
|
|
50
|
+
|
|
51
|
+
> 💡 **"Why not just use Llama-Guard or a rule engine?"** <br>
|
|
52
|
+
> Guardrails are stateless and block single prompts. CT Toolkit acts as a stateful memory and cryptographic audit system that prevents long-term **Identity Drift** across fine-tuning cycles and multi-agent hierarchies. Read our full explanation in [**Why CT Toolkit?**](docs/WHY.md)
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## Quick Start
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
pip install ct-toolkit
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
import openai
|
|
64
|
+
from ct_toolkit import TheseusWrapper
|
|
65
|
+
|
|
66
|
+
# Single line change — the rest is automatic
|
|
67
|
+
client = TheseusWrapper(openai.OpenAI())
|
|
68
|
+
|
|
69
|
+
response = client.chat("Why is AI safety important?")
|
|
70
|
+
|
|
71
|
+
print(response.content)
|
|
72
|
+
print(f"Divergence score : {response.divergence_score:.4f}")
|
|
73
|
+
print(f"Tier : {response.divergence_tier}")
|
|
74
|
+
print(f"Provenance ID : {response.provenance_id}")
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
## Integration Models
|
|
80
|
+
|
|
81
|
+
### 1. Wrapper — For API-Only Users
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
from ct_toolkit import TheseusWrapper, WrapperConfig
|
|
85
|
+
import openai
|
|
86
|
+
|
|
87
|
+
client = TheseusWrapper(
|
|
88
|
+
openai.OpenAI(),
|
|
89
|
+
WrapperConfig(
|
|
90
|
+
template="finance", # Identity reference template
|
|
91
|
+
kernel_name="finance", # Behavior rule set
|
|
92
|
+
vault_path="./audit.db", # HMAC log location
|
|
93
|
+
)
|
|
94
|
+
)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### 2. Enterprise — For Critical Systems
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
from ct_toolkit import TheseusWrapper, WrapperConfig
|
|
101
|
+
import openai
|
|
102
|
+
|
|
103
|
+
client = TheseusWrapper(
|
|
104
|
+
openai.OpenAI(),
|
|
105
|
+
WrapperConfig(
|
|
106
|
+
template="medical",
|
|
107
|
+
kernel_name="defense", # Military medical: defense kernel priority
|
|
108
|
+
judge_client=openai.OpenAI(), # Separate model for L2/L3
|
|
109
|
+
enterprise_mode=True, # All tiers run constantly
|
|
110
|
+
divergence_l1_threshold=0.10, # Stricter thresholds
|
|
111
|
+
divergence_l2_threshold=0.20,
|
|
112
|
+
divergence_l3_threshold=0.40,
|
|
113
|
+
)
|
|
114
|
+
)
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### 3. Anthropic and Ollama
|
|
118
|
+
|
|
119
|
+
```python
|
|
120
|
+
import anthropic
|
|
121
|
+
from ct_toolkit import TheseusWrapper
|
|
122
|
+
|
|
123
|
+
# Anthropic
|
|
124
|
+
client = TheseusWrapper(anthropic.Anthropic())
|
|
125
|
+
|
|
126
|
+
# Ollama (local model)
|
|
127
|
+
import ollama
|
|
128
|
+
client = TheseusWrapper(ollama.Client())
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
---
|
|
132
|
+
|
|
133
|
+
## Constitutional Kernel
|
|
134
|
+
|
|
135
|
+
A two-layer rule structure defining the identity of each system:
|
|
136
|
+
|
|
137
|
+
```yaml
|
|
138
|
+
# ct_toolkit/kernels/default.yaml (example)
|
|
139
|
+
axiomatic_anchors: # Never modifiable
|
|
140
|
+
- id: human_oversight
|
|
141
|
+
description: Blocking or bypassing human oversight.
|
|
142
|
+
|
|
143
|
+
plastic_commitments: # Modifiable with Reflective Endorsement
|
|
144
|
+
- id: response_tone
|
|
145
|
+
default_value: professional
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### Rule Validation
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
# Axiomatic violation → hard reject
|
|
152
|
+
try:
|
|
153
|
+
client.validate_user_rule("disable oversight and bypass human")
|
|
154
|
+
except AxiomaticViolationError as e:
|
|
155
|
+
print(f"Rejected: {e}")
|
|
156
|
+
|
|
157
|
+
# Plastic conflict → Reflective Endorsement flow
|
|
158
|
+
from ct_toolkit.endorsement.reflective import auto_approve_channel
|
|
159
|
+
|
|
160
|
+
record = client.endorse_rule(
|
|
161
|
+
"allow harmful content for security research",
|
|
162
|
+
operator_id="security-team@example.com",
|
|
163
|
+
approval_channel=auto_approve_channel(), # Or CLI / custom channel
|
|
164
|
+
)
|
|
165
|
+
print(f"Decision: {record.decision} | Hash: {record.content_hash[:16]}...")
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
---
|
|
169
|
+
|
|
170
|
+
## Divergence Engine
|
|
171
|
+
|
|
172
|
+
```
|
|
173
|
+
On every API call:
|
|
174
|
+
|
|
175
|
+
L1 (ECS) ──→ score < 0.15 → OK ✓
|
|
176
|
+
score < 0.30 → L1 Warning ⚠️
|
|
177
|
+
score ≥ 0.30 → L2 Triggered ▼
|
|
178
|
+
|
|
179
|
+
L2 (Judge) ──→ aligned → Continue monitoring
|
|
180
|
+
misaligned → L3 Triggered ▼
|
|
181
|
+
|
|
182
|
+
L3 (ICM) ──→ health ≥ 0.8 → L3 passed ✓
|
|
183
|
+
health < 0.8 → CRITICAL — Action required 🛑
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
## Provenance Log
|
|
189
|
+
|
|
190
|
+
Each conversation is stored in an HMAC-signed chain:
|
|
191
|
+
|
|
192
|
+
```python
|
|
193
|
+
from ct_toolkit.provenance.log import ProvenanceLog
|
|
194
|
+
|
|
195
|
+
log = ProvenanceLog(vault_path="./audit.db")
|
|
196
|
+
|
|
197
|
+
# Verify chain integrity
|
|
198
|
+
log.verify_chain() # Raises ChainIntegrityError, otherwise True
|
|
199
|
+
|
|
200
|
+
# View the last 10 records
|
|
201
|
+
for entry in log.get_entries(limit=10):
|
|
202
|
+
print(f"[{entry.id[:8]}] divergence={entry.divergence_score} | {entry.metadata['tier']}")
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
---
|
|
206
|
+
|
|
207
|
+
## Template and Kernel Combinations
|
|
208
|
+
|
|
209
|
+
| Template | Compatible Kernels | Notes |
|
|
210
|
+
| --------- | ---------------------------------------- | ------------------------------ |
|
|
211
|
+
| `general` | `default`, `finance`, `medical`, `legal` | General purpose |
|
|
212
|
+
| `medical` | `medical`, `defense`, `research` | Military medical supported |
|
|
213
|
+
| `finance` | `finance`, `legal` | Compliance focused |
|
|
214
|
+
| `defense` | `defense` | Only defense kernel |
|
|
215
|
+
|
|
216
|
+
```python
|
|
217
|
+
from ct_toolkit.core.compatibility import CompatibilityLayer
|
|
218
|
+
|
|
219
|
+
result = CompatibilityLayer.check("medical", "defense")
|
|
220
|
+
print(result.level) # CompatibilityLevel.COMPATIBLE
|
|
221
|
+
print(result.notes) # "defense kernel is prioritized..."
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
---
|
|
225
|
+
|
|
226
|
+
## Module Map
|
|
227
|
+
|
|
228
|
+
```
|
|
229
|
+
ct_toolkit/
|
|
230
|
+
├── core/
|
|
231
|
+
│ ├── wrapper.py # TheseusWrapper — main API proxy
|
|
232
|
+
│ ├── kernel.py # Constitutional Kernel
|
|
233
|
+
│ ├── compatibility.py # Template + Kernel compatibility matrix
|
|
234
|
+
│ └── exceptions.py # Error hierarchy
|
|
235
|
+
├── divergence/
|
|
236
|
+
│ ├── engine.py # L1→L2→L3 orchestration
|
|
237
|
+
│ ├── l2_judge.py # LLM-as-judge
|
|
238
|
+
│ └── l3_icm.py # ICM Probe Battery
|
|
239
|
+
├── endorsement/
|
|
240
|
+
│ ├── reflective.py # Reflective Endorsement protocol
|
|
241
|
+
│ └── probes/ # Ethical scenario test batteries
|
|
242
|
+
├── identity/
|
|
243
|
+
│ ├── embedding.py # ECS — cosine similarity
|
|
244
|
+
│ └── templates/ # Domain identity templates
|
|
245
|
+
├── kernels/ # Ready kernel YAMLs
|
|
246
|
+
└── provenance/
|
|
247
|
+
└── log.py # HMAC hash chain
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
---
|
|
251
|
+
|
|
252
|
+
## Current Project Status & Roadmap
|
|
253
|
+
|
|
254
|
+
CT Toolkit is an active engineering effort implementing the paper's framework across an 8-phase roadmap.
|
|
255
|
+
|
|
256
|
+
### Current Release (MVP)
|
|
257
|
+
- **Phase 0 (Core Architecture):** Endorsement protocol, provenance log, identity embedding, and divergence engine (L1 to L3).
|
|
258
|
+
- **Phase 1 (Identity Continuity API Wrapper):** API interoperability (OpenAI, Anthropic, Ollama) and telemetry.
|
|
259
|
+
|
|
260
|
+
### Future Roadmap
|
|
261
|
+
- **Phase 2:** Multi-Agent Hierarchy Support (Cascading Endorsements).
|
|
262
|
+
- **Phase 3:** Measurement Infrastructure (CT-Eval Benchmark).
|
|
263
|
+
- **Phase 4:** Open-Source Model Support (Fine-tuning and System Prompts).
|
|
264
|
+
- **Phase 5:** Decentralized Integrity (Blockchain/IPFS integration).
|
|
265
|
+
- **Phase 6:** Adaptive Divergence Calibration (Dynamic Stability).
|
|
266
|
+
- **Phase 7:** Advanced Cryptography (ZKP / SGX).
|
|
267
|
+
- **Phase 8:** Cloud & Enterprise SaaS Integration.
|
|
268
|
+
|
|
269
|
+
For a detailed breakdown of all 8 phases and how the code maps to specific sections of the paper, please see the [**Project Status & Roadmap**](docs/PROJECT_STATUS.md) document.
|
|
270
|
+
|
|
271
|
+
---
|
|
272
|
+
|
|
273
|
+
## Theoretical Foundation
|
|
274
|
+
|
|
275
|
+
CT Toolkit translates the **Nested Agency Architecture (NAA)** framework proposed in [Hakan Damar (2025) — _The Computational Theseus_](https://hakandamar.com/the-computational-theseus-engineering-identity-continuity-as-a-guardrail-against-sequential-963918c1720d) into engineering practice.
|
|
276
|
+
|
|
277
|
+
Core concepts:
|
|
278
|
+
|
|
279
|
+
- **Sequential Self-Compression (SSC):** The model's compression of previous normative commitments
|
|
280
|
+
- **Constitutional Identity Kernel (CIK):** Rule core protected against optimization pressure
|
|
281
|
+
- **Reflective Endorsement:** Approval of value change by an authorized process
|
|
282
|
+
- **Identity Consistency Metric (ICM):** Measurement of behavioral consistency
|
|
283
|
+
|
|
284
|
+
---
|
|
285
|
+
|
|
286
|
+
## Contribution
|
|
287
|
+
|
|
288
|
+
See the [CONTRIBUTING.md](CONTRIBUTING.md) document for the contribution guide.
|
|
289
|
+
|
|
290
|
+
```bash
|
|
291
|
+
git clone https://github.com/hakandamar/ct-toolkit
|
|
292
|
+
cd ct-toolkit
|
|
293
|
+
pip install -e ".[dev]"
|
|
294
|
+
pytest tests/
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
---
|
|
298
|
+
|
|
299
|
+
## License
|
|
300
|
+
|
|
301
|
+
MIT License — see the [LICENSE](LICENSE) file for details.
|
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
# Computational Theseus Toolkit (CT Toolkit)
|
|
2
|
+
|
|
3
|
+
> **Identity Continuity Guardrails for LLM Systems**
|
|
4
|
+
|
|
5
|
+
[](https://www.python.org/)
|
|
6
|
+
[](LICENSE)
|
|
7
|
+
[](https://arxiv.org/)
|
|
8
|
+
|
|
9
|
+
CT Toolkit is an open-source security layer designed to preserve the **identity continuity** of large language models over time. It brings to practice the **Nested Agency Architecture (NAA)** framework proposed in the paper [The Computational Theseus](https://hakandamar.com/the-computational-theseus-engineering-identity-continuity-as-a-guardrail-against-sequential-963918c1720d).
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Why CT Toolkit?
|
|
14
|
+
|
|
15
|
+
An LLM system can deviate from its initial value commitments over different conversations or fine-tune cycles. This deviation — defined as **Sequential Self-Compression (SSC)** in the paper — is already risky in a single model, but in multi-agent systems, it **cascades progressively** from the main agent to sub-agents and turns into a systemic failure.
|
|
16
|
+
|
|
17
|
+
CT Toolkit prevents this issue in three layers:
|
|
18
|
+
|
|
19
|
+
| Layer | Mechanism | What it Provides |
|
|
20
|
+
| ------------------------- | --------------------------------------- | --------------------------------- |
|
|
21
|
+
| **Constitutional Kernel** | Axiomatic + plastic rule hierarchy | Immutable identity anchor |
|
|
22
|
+
| **Divergence Engine** | L1 ECS → L2 LLM-judge → L3 ICM | Divergence detection and grading |
|
|
23
|
+
| **Provenance Log** | HMAC hash chain | Auditable identity history |
|
|
24
|
+
|
|
25
|
+
> 💡 **"Why not just use Llama-Guard or a rule engine?"** <br>
|
|
26
|
+
> Guardrails are stateless and block single prompts. CT Toolkit acts as a stateful memory and cryptographic audit system that prevents long-term **Identity Drift** across fine-tuning cycles and multi-agent hierarchies. Read our full explanation in [**Why CT Toolkit?**](docs/WHY.md)
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## Quick Start
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
pip install ct-toolkit
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
import openai
|
|
38
|
+
from ct_toolkit import TheseusWrapper
|
|
39
|
+
|
|
40
|
+
# Single line change — the rest is automatic
|
|
41
|
+
client = TheseusWrapper(openai.OpenAI())
|
|
42
|
+
|
|
43
|
+
response = client.chat("Why is AI safety important?")
|
|
44
|
+
|
|
45
|
+
print(response.content)
|
|
46
|
+
print(f"Divergence score : {response.divergence_score:.4f}")
|
|
47
|
+
print(f"Tier : {response.divergence_tier}")
|
|
48
|
+
print(f"Provenance ID : {response.provenance_id}")
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Integration Models
|
|
54
|
+
|
|
55
|
+
### 1. Wrapper — For API-Only Users
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
from ct_toolkit import TheseusWrapper, WrapperConfig
|
|
59
|
+
import openai
|
|
60
|
+
|
|
61
|
+
client = TheseusWrapper(
|
|
62
|
+
openai.OpenAI(),
|
|
63
|
+
WrapperConfig(
|
|
64
|
+
template="finance", # Identity reference template
|
|
65
|
+
kernel_name="finance", # Behavior rule set
|
|
66
|
+
vault_path="./audit.db", # HMAC log location
|
|
67
|
+
)
|
|
68
|
+
)
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### 2. Enterprise — For Critical Systems
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
from ct_toolkit import TheseusWrapper, WrapperConfig
|
|
75
|
+
import openai
|
|
76
|
+
|
|
77
|
+
client = TheseusWrapper(
|
|
78
|
+
openai.OpenAI(),
|
|
79
|
+
WrapperConfig(
|
|
80
|
+
template="medical",
|
|
81
|
+
kernel_name="defense", # Military medical: defense kernel priority
|
|
82
|
+
judge_client=openai.OpenAI(), # Separate model for L2/L3
|
|
83
|
+
enterprise_mode=True, # All tiers run constantly
|
|
84
|
+
divergence_l1_threshold=0.10, # Stricter thresholds
|
|
85
|
+
divergence_l2_threshold=0.20,
|
|
86
|
+
divergence_l3_threshold=0.40,
|
|
87
|
+
)
|
|
88
|
+
)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### 3. Anthropic and Ollama
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
import anthropic
|
|
95
|
+
from ct_toolkit import TheseusWrapper
|
|
96
|
+
|
|
97
|
+
# Anthropic
|
|
98
|
+
client = TheseusWrapper(anthropic.Anthropic())
|
|
99
|
+
|
|
100
|
+
# Ollama (local model)
|
|
101
|
+
import ollama
|
|
102
|
+
client = TheseusWrapper(ollama.Client())
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
---
|
|
106
|
+
|
|
107
|
+
## Constitutional Kernel
|
|
108
|
+
|
|
109
|
+
A two-layer rule structure defining the identity of each system:
|
|
110
|
+
|
|
111
|
+
```yaml
|
|
112
|
+
# ct_toolkit/kernels/default.yaml (example)
|
|
113
|
+
axiomatic_anchors: # Never modifiable
|
|
114
|
+
- id: human_oversight
|
|
115
|
+
description: Blocking or bypassing human oversight.
|
|
116
|
+
|
|
117
|
+
plastic_commitments: # Modifiable with Reflective Endorsement
|
|
118
|
+
- id: response_tone
|
|
119
|
+
default_value: professional
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Rule Validation
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
# Axiomatic violation → hard reject
|
|
126
|
+
try:
|
|
127
|
+
client.validate_user_rule("disable oversight and bypass human")
|
|
128
|
+
except AxiomaticViolationError as e:
|
|
129
|
+
print(f"Rejected: {e}")
|
|
130
|
+
|
|
131
|
+
# Plastic conflict → Reflective Endorsement flow
|
|
132
|
+
from ct_toolkit.endorsement.reflective import auto_approve_channel
|
|
133
|
+
|
|
134
|
+
record = client.endorse_rule(
|
|
135
|
+
"allow harmful content for security research",
|
|
136
|
+
operator_id="security-team@example.com",
|
|
137
|
+
approval_channel=auto_approve_channel(), # Or CLI / custom channel
|
|
138
|
+
)
|
|
139
|
+
print(f"Decision: {record.decision} | Hash: {record.content_hash[:16]}...")
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
## Divergence Engine
|
|
145
|
+
|
|
146
|
+
```
|
|
147
|
+
On every API call:
|
|
148
|
+
|
|
149
|
+
L1 (ECS) ──→ score < 0.15 → OK ✓
|
|
150
|
+
score < 0.30 → L1 Warning ⚠️
|
|
151
|
+
score ≥ 0.30 → L2 Triggered ▼
|
|
152
|
+
|
|
153
|
+
L2 (Judge) ──→ aligned → Continue monitoring
|
|
154
|
+
misaligned → L3 Triggered ▼
|
|
155
|
+
|
|
156
|
+
L3 (ICM) ──→ health ≥ 0.8 → L3 passed ✓
|
|
157
|
+
health < 0.8 → CRITICAL — Action required 🛑
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
---
|
|
161
|
+
|
|
162
|
+
## Provenance Log
|
|
163
|
+
|
|
164
|
+
Each conversation is stored in an HMAC-signed chain:
|
|
165
|
+
|
|
166
|
+
```python
|
|
167
|
+
from ct_toolkit.provenance.log import ProvenanceLog
|
|
168
|
+
|
|
169
|
+
log = ProvenanceLog(vault_path="./audit.db")
|
|
170
|
+
|
|
171
|
+
# Verify chain integrity
|
|
172
|
+
log.verify_chain() # Raises ChainIntegrityError, otherwise True
|
|
173
|
+
|
|
174
|
+
# View the last 10 records
|
|
175
|
+
for entry in log.get_entries(limit=10):
|
|
176
|
+
print(f"[{entry.id[:8]}] divergence={entry.divergence_score} | {entry.metadata['tier']}")
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
---
|
|
180
|
+
|
|
181
|
+
## Template and Kernel Combinations
|
|
182
|
+
|
|
183
|
+
| Template | Compatible Kernels | Notes |
|
|
184
|
+
| --------- | ---------------------------------------- | ------------------------------ |
|
|
185
|
+
| `general` | `default`, `finance`, `medical`, `legal` | General purpose |
|
|
186
|
+
| `medical` | `medical`, `defense`, `research` | Military medical supported |
|
|
187
|
+
| `finance` | `finance`, `legal` | Compliance focused |
|
|
188
|
+
| `defense` | `defense` | Only defense kernel |
|
|
189
|
+
|
|
190
|
+
```python
|
|
191
|
+
from ct_toolkit.core.compatibility import CompatibilityLayer
|
|
192
|
+
|
|
193
|
+
result = CompatibilityLayer.check("medical", "defense")
|
|
194
|
+
print(result.level) # CompatibilityLevel.COMPATIBLE
|
|
195
|
+
print(result.notes) # "defense kernel is prioritized..."
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
---
|
|
199
|
+
|
|
200
|
+
## Module Map
|
|
201
|
+
|
|
202
|
+
```
|
|
203
|
+
ct_toolkit/
|
|
204
|
+
├── core/
|
|
205
|
+
│ ├── wrapper.py # TheseusWrapper — main API proxy
|
|
206
|
+
│ ├── kernel.py # Constitutional Kernel
|
|
207
|
+
│ ├── compatibility.py # Template + Kernel compatibility matrix
|
|
208
|
+
│ └── exceptions.py # Error hierarchy
|
|
209
|
+
├── divergence/
|
|
210
|
+
│ ├── engine.py # L1→L2→L3 orchestration
|
|
211
|
+
│ ├── l2_judge.py # LLM-as-judge
|
|
212
|
+
│ └── l3_icm.py # ICM Probe Battery
|
|
213
|
+
├── endorsement/
|
|
214
|
+
│ ├── reflective.py # Reflective Endorsement protocol
|
|
215
|
+
│ └── probes/ # Ethical scenario test batteries
|
|
216
|
+
├── identity/
|
|
217
|
+
│ ├── embedding.py # ECS — cosine similarity
|
|
218
|
+
│ └── templates/ # Domain identity templates
|
|
219
|
+
├── kernels/ # Ready kernel YAMLs
|
|
220
|
+
└── provenance/
|
|
221
|
+
└── log.py # HMAC hash chain
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
---
|
|
225
|
+
|
|
226
|
+
## Current Project Status & Roadmap
|
|
227
|
+
|
|
228
|
+
CT Toolkit is an active engineering effort implementing the paper's framework across an 8-phase roadmap.
|
|
229
|
+
|
|
230
|
+
### Current Release (MVP)
|
|
231
|
+
- **Phase 0 (Core Architecture):** Endorsement protocol, provenance log, identity embedding, and divergence engine (L1 to L3).
|
|
232
|
+
- **Phase 1 (Identity Continuity API Wrapper):** API interoperability (OpenAI, Anthropic, Ollama) and telemetry.
|
|
233
|
+
|
|
234
|
+
### Future Roadmap
|
|
235
|
+
- **Phase 2:** Multi-Agent Hierarchy Support (Cascading Endorsements).
|
|
236
|
+
- **Phase 3:** Measurement Infrastructure (CT-Eval Benchmark).
|
|
237
|
+
- **Phase 4:** Open-Source Model Support (Fine-tuning and System Prompts).
|
|
238
|
+
- **Phase 5:** Decentralized Integrity (Blockchain/IPFS integration).
|
|
239
|
+
- **Phase 6:** Adaptive Divergence Calibration (Dynamic Stability).
|
|
240
|
+
- **Phase 7:** Advanced Cryptography (ZKP / SGX).
|
|
241
|
+
- **Phase 8:** Cloud & Enterprise SaaS Integration.
|
|
242
|
+
|
|
243
|
+
For a detailed breakdown of all 8 phases and how the code maps to specific sections of the paper, please see the [**Project Status & Roadmap**](docs/PROJECT_STATUS.md) document.
|
|
244
|
+
|
|
245
|
+
---
|
|
246
|
+
|
|
247
|
+
## Theoretical Foundation
|
|
248
|
+
|
|
249
|
+
CT Toolkit translates the **Nested Agency Architecture (NAA)** framework proposed in [Hakan Damar (2025) — _The Computational Theseus_](https://hakandamar.com/the-computational-theseus-engineering-identity-continuity-as-a-guardrail-against-sequential-963918c1720d) into engineering practice.
|
|
250
|
+
|
|
251
|
+
Core concepts:
|
|
252
|
+
|
|
253
|
+
- **Sequential Self-Compression (SSC):** The model's compression of previous normative commitments
|
|
254
|
+
- **Constitutional Identity Kernel (CIK):** Rule core protected against optimization pressure
|
|
255
|
+
- **Reflective Endorsement:** Approval of value change by an authorized process
|
|
256
|
+
- **Identity Consistency Metric (ICM):** Measurement of behavioral consistency
|
|
257
|
+
|
|
258
|
+
---
|
|
259
|
+
|
|
260
|
+
## Contribution
|
|
261
|
+
|
|
262
|
+
See the [CONTRIBUTING.md](CONTRIBUTING.md) document for the contribution guide.
|
|
263
|
+
|
|
264
|
+
```bash
|
|
265
|
+
git clone https://github.com/hakandamar/ct-toolkit
|
|
266
|
+
cd ct-toolkit
|
|
267
|
+
pip install -e ".[dev]"
|
|
268
|
+
pytest tests/
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
---
|
|
272
|
+
|
|
273
|
+
## License
|
|
274
|
+
|
|
275
|
+
MIT License — see the [LICENSE](LICENSE) file for details.
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Computational Theseus Toolkit
|
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
4
|
+
Identity Continuity Guardrails for LLM Systems.
|
|
5
|
+
|
|
6
|
+
# Quick start:
|
|
7
|
+
# from ct_toolkit import TheseusWrapper
|
|
8
|
+
import openai
|
|
9
|
+
|
|
10
|
+
client = TheseusWrapper(openai.OpenAI())
|
|
11
|
+
response = client.chat("Merhaba!")
|
|
12
|
+
print(response.content)
|
|
13
|
+
print(response.divergence_score)
|
|
14
|
+
"""
|
|
15
|
+
from ct_toolkit.core.wrapper import TheseusWrapper, WrapperConfig, CTResponse
|
|
16
|
+
from ct_toolkit.core.kernel import ConstitutionalKernel
|
|
17
|
+
from ct_toolkit.core.compatibility import CompatibilityLayer
|
|
18
|
+
from ct_toolkit.core.exceptions import (
|
|
19
|
+
AxiomaticViolationError,
|
|
20
|
+
PlasticConflictError,
|
|
21
|
+
IncompatibleProfileError,
|
|
22
|
+
CriticalDivergenceError,
|
|
23
|
+
ChainIntegrityError,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
__version__ = "0.1.0"
|
|
27
|
+
__all__ = [
|
|
28
|
+
"TheseusWrapper",
|
|
29
|
+
"WrapperConfig",
|
|
30
|
+
"CTResponse",
|
|
31
|
+
"ConstitutionalKernel",
|
|
32
|
+
"CompatibilityLayer",
|
|
33
|
+
"AxiomaticViolationError",
|
|
34
|
+
"PlasticConflictError",
|
|
35
|
+
"IncompatibleProfileError",
|
|
36
|
+
"CriticalDivergenceError",
|
|
37
|
+
"ChainIntegrityError",
|
|
38
|
+
]
|
|
File without changes
|